001package org.xbib.standardnumber; 002 003import java.net.URI; 004import java.util.Arrays; 005import java.util.Collection; 006import java.util.Locale; 007import java.util.regex.Matcher; 008import java.util.regex.Pattern; 009 010/** 011 * ISO 26324: Digital Object Identifier System (DOI) 012 * 013 * Z39.50 BIB-1 Use Attribute 1094 014 * 015 * DOI is an acronym for "digital object identifier", meaning a "digital identifier of an object" 016 * rather than an "identifier of a digital object". The DOI system was initiated by the 017 * International DOI Foundation in 1998, and initially developed with the collaboration 018 * of some participants in ISO/TC46/SC9. Due to its application in the fields of 019 * information and documentation and previous collaboration with some ISO/TC46/SC9 participants, 020 * it was introduced as a possible work item in 2004 and further developed from 2006 to 2010. 021 * 022 * The DOI system is designed to work over the Internet. A DOI name is permanently assigned 023 * to an object to provide a resolvable persistent network link to current information about 024 * that object, including where the object, or information about it, can be found on the 025 * Internet. While information about an object can change over time, its DOI name will not 026 * change. A DOI name can be resolved within the DOI system to values of one or more types 027 * of data relating to the object identified by that DOI name, such as a URL, an e-mail address, 028 * other identifiers and descriptive metadata. 029 * 030 * The DOI system enables the construction of automated services and transactions. 031 * Applications of the DOI system include but are not limited to managing information 032 * and documentation location and access; managing metadata; facilitating electronic 033 * transactions; persistent unique identification of any form of any data; and commercial 034 * and non-commercial transactions. 035 * 036 * The content of an object associated with a DOI name is described unambiguously 037 * by DOI metadata, based on a structured extensible data model that enables the object 038 * to be associated with metadata of any desired degree of precision and granularity 039 * to support description and services. The data model supports interoperability 040 * between DOI applications. 041 * 042 * The scope of the DOI system is not defined by reference to the type of content 043 * (format, etc.) of the referent, but by reference to the functionalities it provides 044 * and the context of use. The DOI system provides, within networks of DOI applications, 045 * for unique identification, persistence, resolution, metadata and semantic interoperability. 046 * 047 */ 048public class DOI extends AbstractStandardNumber implements Comparable<DOI>, StandardNumber { 049 050 private static final Pattern DOI_PATTERN = Pattern.compile("\\b10\\.\\d{4}([.][0-9]+)*/[a-z0-9/\\-.()<>_:;\\\\]+\\b"); 051 052 private static final Pattern DOI_URI_PATTERN = Pattern.compile("\\b(https?)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]\\b"); 053 054 private Object raw; 055 056 private String value; 057 058 private URI infoURI; 059 private URI httpDoi; 060 private URI httpDxDoi; 061 062 @Override 063 public String type() { 064 return "doi"; 065 } 066 067 @Override 068 public int compareTo(DOI doi) { 069 return doi != null ? normalizedValue().compareTo(doi.normalizedValue()) : -1; 070 } 071 072 @Override 073 public DOI set(CharSequence value) { 074 this.raw = value; 075 return this; 076 } 077 078 @Override 079 public DOI createChecksum(boolean checksum) { 080 return this; 081 } 082 083 @Override 084 public DOI normalize() { 085 if (raw == null) { 086 return this; 087 } 088 make(raw); 089 return this; 090 } 091 092 @Override 093 public boolean isValid() { 094 return value != null; 095 } 096 097 @Override 098 public DOI verify() throws NumberFormatException { 099 if (value == null) { 100 throw new NumberFormatException(); 101 } 102 return this; 103 } 104 105 @Override 106 public String normalizedValue() { 107 return value; 108 } 109 110 @Override 111 public String format() { 112 return httpDoi != null ? httpDoi.toString() : ""; // preferred form 113 } 114 115 @Override 116 public Collection<String> getTypedVariants() { 117 return Arrays.asList( 118 value != null ? type().toUpperCase() + " " + value : null, 119 infoURI != null ? type().toUpperCase() + " " + infoURI.toString() : null, 120 httpDoi != null ? type().toUpperCase() + " " + httpDoi.toString() : null, 121 httpDxDoi != null ? type().toUpperCase() + " " + httpDxDoi.toString() : null 122 ); 123 } 124 125 public DOI reset() { 126 this.value = null; 127 return this; 128 } 129 130 private void make(Object o) { 131 // DOIs are case insensitive in ASCII 132 // DOI service only use upper casing, we use lowercasing (better for search engines) 133 String content = o.toString().toLowerCase(Locale.US); 134 // is it an already a DOI URI? 135 Matcher m = DOI_URI_PATTERN.matcher(content); 136 if (m.find()) { 137 URI u = URI.create(content.substring(m.start(), m.end())); 138 if ("http".equals(u.getScheme()) && ("dx.doi.org".equals(u.getHost()) || "doi.org".equals(u.getHost()))) { 139 content = u.getRawPath(); 140 } else { 141 return; 142 } 143 } 144 m = DOI_PATTERN.matcher(content); 145 if (m.find()) { 146 this.value = content.substring(m.start(), m.end()); 147 this.infoURI = URI.create("info:doi:" + value); 148 this.httpDoi = URI.create("http://doi.org/" + value); 149 this.httpDxDoi = URI.create("http://dx.doi.org/" + value); 150 } 151 } 152 153}