001package org.xbib.elasticsearch.index.analysis.standardnumber; 002 003import org.elasticsearch.ElasticsearchException; 004import org.elasticsearch.common.collect.Sets; 005import org.elasticsearch.common.component.AbstractLifecycleComponent; 006import org.elasticsearch.common.inject.Inject; 007import org.elasticsearch.common.settings.Settings; 008import org.xbib.standardnumber.ISBN; 009import org.xbib.standardnumber.StandardNumber; 010 011import java.util.Arrays; 012import java.util.Collection; 013import java.util.Set; 014 015import static org.elasticsearch.common.collect.Lists.newLinkedList; 016 017public class Detector extends AbstractLifecycleComponent<Detector> { 018 019 private final static ThreadLocal<Set<StandardNumber>> stdnums = new ThreadLocal<Set<StandardNumber>>(); 020 021 @Inject 022 public Detector(Settings settings) { 023 super(settings); 024 } 025 026 @Override 027 protected void doStart() throws ElasticsearchException { 028 } 029 030 @Override 031 protected void doStop() throws ElasticsearchException { 032 } 033 034 @Override 035 protected void doClose() throws ElasticsearchException { 036 } 037 038 protected Collection<StandardNumber> getStdNums() { 039 if (stdnums.get() == null) { 040 String[] s = settings.getAsArray("types", null); 041 Set<String> types = s != null ? Sets.newTreeSet(Arrays.asList(s)) : null; 042 Set<StandardNumber> set = Sets.newLinkedHashSet(); 043 set.addAll(types == null ? 044 StandardNumberService.create() : 045 StandardNumberService.create(types)); 046 stdnums.set(set); 047 } 048 return stdnums.get(); 049 } 050 051 public Collection<StandardNumber> detect(CharSequence content) { 052 Collection<StandardNumber> candidates = newLinkedList(); 053 for (StandardNumber stdnum : getStdNums()) { 054 stdnum.reset(); 055 try { 056 candidates.add(stdnum.set(content).normalize().verify()); 057 } catch (NumberFormatException e) { 058 // skip 059 } 060 } 061 return candidates; 062 } 063 064 public Collection<CharSequence> lookup(CharSequence content) { 065 Collection<CharSequence> variants = newLinkedList(); 066 for (StandardNumber stdnum : getStdNums()) { 067 stdnum.reset(); 068 if (stdnum instanceof ISBN) { 069 handleISBN((ISBN) stdnum, content, variants); 070 } else { 071 stdnum = stdnum.set(content).normalize(); 072 if (stdnum.isValid()) { 073 for (String s : stdnum.getTypedVariants()) { 074 if (s != null) { 075 variants.add(s); 076 } 077 } 078 } 079 } 080 } 081 return variants; 082 } 083 084 private void handleISBN(ISBN stdnum, CharSequence content, Collection<CharSequence> variants) throws NumberFormatException { 085 ISBN isbn = stdnum.set(content).normalize(); 086 if (isbn.isValid()) { 087 if (!isbn.isEAN()) { 088 // create up to 4 variants: ISBN, ISBN normalized, ISBN-13, ISBN-13 normalized 089 variants.add("ISBN " + isbn.ean(false).format()); 090 variants.add("ISBN " + isbn.ean(false).normalizedValue()); 091 isbn = isbn.ean(true).set(content).normalize(); 092 if (isbn.isValid()) { 093 variants.add("ISBN " + isbn.format()); 094 variants.add("ISBN " + isbn.normalizedValue()); 095 } 096 } else { 097 // 2 variants, do not create ISBN-10 for an ISBN-13 098 variants.add("ISBN " + isbn.ean(true).format()); 099 variants.add("ISBN " + isbn.ean(true).normalizedValue()); 100 } 101 } 102 } 103 104}