001package org.xbib.standardnumber;
002
003import java.net.URI;
004import java.util.Arrays;
005import java.util.Collection;
006import java.util.Locale;
007import java.util.regex.Matcher;
008import java.util.regex.Pattern;
009
010/**
011 * ISO 26324: Digital Object Identifier System (DOI)
012 *
013 * Z39.50 BIB-1 Use Attribute 1094
014 *
015 * DOI is an acronym for "digital object identifier", meaning a "digital identifier of an object"
016 * rather than an "identifier of a digital object". The DOI system was initiated by the
017 * International DOI Foundation in 1998, and initially developed with the collaboration
018 * of some participants in ISO/TC46/SC9. Due to its application in the fields of
019 * information and documentation and previous collaboration with some ISO/TC46/SC9 participants,
020 * it was introduced as a possible work item in 2004 and further developed from 2006 to 2010.
021 *
022 * The DOI system is designed to work over the Internet. A DOI name is permanently assigned
023 * to an object to provide a resolvable persistent network link to current information about
024 * that object, including where the object, or information about it, can be found on the
025 * Internet. While information about an object can change over time, its DOI name will not
026 * change. A DOI name can be resolved within the DOI system to values of one or more types
027 * of data relating to the object identified by that DOI name, such as a URL, an e-mail address,
028 * other identifiers and descriptive metadata.
029 *
030 * The DOI system enables the construction of automated services and transactions.
031 * Applications of the DOI system include but are not limited to managing information
032 * and documentation location and access; managing metadata; facilitating electronic
033 * transactions; persistent unique identification of any form of any data; and commercial
034 * and non-commercial transactions.
035 *
036 * The content of an object associated with a DOI name is described unambiguously
037 * by DOI metadata, based on a structured extensible data model that enables the object
038 * to be associated with metadata of any desired degree of precision and granularity
039 * to support description and services. The data model supports interoperability
040 * between DOI applications.
041 *
042 * The scope of the DOI system is not defined by reference to the type of content
043 * (format, etc.) of the referent, but by reference to the functionalities it provides
044 * and the context of use. The DOI system provides, within networks of DOI applications,
045 * for unique identification, persistence, resolution, metadata and semantic interoperability.
046 *
047 */
048public class DOI extends AbstractStandardNumber implements Comparable<DOI>, StandardNumber {
049
050    private static final Pattern DOI_PATTERN = Pattern.compile("\\b10\\.\\d{4}([.][0-9]+)*/[a-z0-9/\\-.()<>_:;\\\\]+\\b");
051
052    private static final Pattern DOI_URI_PATTERN = Pattern.compile("\\b(https?)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]\\b");
053
054    private Object raw;
055
056    private String value;
057
058    private URI infoURI;
059    private URI httpDoi;
060    private URI httpDxDoi;
061
062    @Override
063    public String type() {
064        return "doi";
065    }
066
067    @Override
068    public int compareTo(DOI doi) {
069        return doi != null ? normalizedValue().compareTo(doi.normalizedValue()) : -1;
070    }
071
072    @Override
073    public DOI set(CharSequence value) {
074        this.raw = value;
075        return this;
076    }
077
078    @Override
079    public DOI createChecksum(boolean checksum) {
080        return this;
081    }
082
083    @Override
084    public DOI normalize() {
085        if (raw == null) {
086            return this;
087        }
088        make(raw);
089        return this;
090    }
091
092    @Override
093    public boolean isValid() {
094        return value != null;
095    }
096
097    @Override
098    public DOI verify() throws NumberFormatException {
099        if (value == null) {
100            throw new NumberFormatException();
101        }
102        return this;
103    }
104
105    @Override
106    public String normalizedValue() {
107        return value;
108    }
109
110    @Override
111    public String format() {
112        return httpDoi != null ? httpDoi.toString() : ""; // preferred form
113    }
114
115    @Override
116    public Collection<String> getTypedVariants() {
117        return Arrays.asList(
118                value != null ? type().toUpperCase() + " " + value : null,
119                infoURI != null ? type().toUpperCase() + " " + infoURI.toString() : null,
120                httpDoi != null ? type().toUpperCase() + " " + httpDoi.toString() : null,
121                httpDxDoi != null ? type().toUpperCase() + " " + httpDxDoi.toString() : null
122        );
123    }
124
125    public DOI reset() {
126        this.value = null;
127        return this;
128    }
129
130    private void make(Object o) {
131        // DOIs are case insensitive in ASCII
132        // DOI service only use upper casing, we use lowercasing (better for search engines)
133        String content = o.toString().toLowerCase(Locale.US);
134        // is it an already a DOI URI?
135        Matcher m = DOI_URI_PATTERN.matcher(content);
136        if (m.find()) {
137            URI u = URI.create(content.substring(m.start(), m.end()));
138            if ("http".equals(u.getScheme()) && ("dx.doi.org".equals(u.getHost()) || "doi.org".equals(u.getHost()))) {
139                content = u.getRawPath();
140            } else {
141                return;
142            }
143        }
144        m = DOI_PATTERN.matcher(content);
145        if (m.find()) {
146            this.value = content.substring(m.start(), m.end());
147            this.infoURI = URI.create("info:doi:" + value);
148            this.httpDoi = URI.create("http://doi.org/" + value);
149            this.httpDxDoi = URI.create("http://dx.doi.org/" + value);
150        }
151    }
152
153}