001package org.xbib.elasticsearch.index.analysis.standardnumber;
002
003import org.elasticsearch.ElasticsearchException;
004import org.elasticsearch.common.collect.Sets;
005import org.elasticsearch.common.component.AbstractLifecycleComponent;
006import org.elasticsearch.common.inject.Inject;
007import org.elasticsearch.common.settings.Settings;
008import org.xbib.standardnumber.ISBN;
009import org.xbib.standardnumber.StandardNumber;
010
011import java.util.Arrays;
012import java.util.Collection;
013import java.util.Set;
014
015import static org.elasticsearch.common.collect.Lists.newLinkedList;
016
017public class Detector extends AbstractLifecycleComponent<Detector>  {
018
019    private final static ThreadLocal<Set<StandardNumber>> stdnums = new ThreadLocal<Set<StandardNumber>>();
020
021    @Inject
022    public Detector(Settings settings) {
023        super(settings);
024    }
025
026    @Override
027    protected void doStart() throws ElasticsearchException {
028    }
029
030    @Override
031    protected void doStop() throws ElasticsearchException {
032    }
033
034    @Override
035    protected void doClose() throws ElasticsearchException {
036    }
037
038    protected Collection<StandardNumber> getStdNums() {
039        if (stdnums.get() == null) {
040            String[] s = settings.getAsArray("types", null);
041            Set<String> types = s != null ? Sets.newTreeSet(Arrays.asList(s)) : null;
042            Set<StandardNumber> set = Sets.newLinkedHashSet();
043            set.addAll(types == null ?
044                    StandardNumberService.create() :
045                    StandardNumberService.create(types));
046            stdnums.set(set);
047        }
048        return stdnums.get();
049    }
050
051    public Collection<StandardNumber> detect(CharSequence content) {
052        Collection<StandardNumber> candidates = newLinkedList();
053        for (StandardNumber stdnum : getStdNums()) {
054            stdnum.reset();
055            try {
056                candidates.add(stdnum.set(content).normalize().verify());
057            } catch (NumberFormatException e) {
058                // skip
059            }
060        }
061        return candidates;
062    }
063
064    public Collection<CharSequence> lookup(CharSequence content) {
065        Collection<CharSequence> variants = newLinkedList();
066        for (StandardNumber stdnum : getStdNums()) {
067            stdnum.reset();
068            if (stdnum instanceof ISBN) {
069                handleISBN((ISBN) stdnum, content, variants);
070            } else {
071                stdnum = stdnum.set(content).normalize();
072                if (stdnum.isValid()) {
073                    for (String s : stdnum.getTypedVariants()) {
074                        if (s != null) {
075                            variants.add(s);
076                        }
077                    }
078                }
079            }
080        }
081        return variants;
082    }
083
084    private void handleISBN(ISBN stdnum, CharSequence content, Collection<CharSequence> variants) throws NumberFormatException {
085        ISBN isbn = stdnum.set(content).normalize();
086        if (isbn.isValid()) {
087            if (!isbn.isEAN()) {
088                // create up to 4 variants: ISBN, ISBN normalized, ISBN-13, ISBN-13 normalized
089                variants.add("ISBN " + isbn.ean(false).format());
090                variants.add("ISBN " + isbn.ean(false).normalizedValue());
091                isbn = isbn.ean(true).set(content).normalize();
092                if (isbn.isValid()) {
093                    variants.add("ISBN " + isbn.format());
094                    variants.add("ISBN " + isbn.normalizedValue());
095                }
096            } else {
097                // 2 variants, do not create ISBN-10 for an ISBN-13
098                variants.add("ISBN " + isbn.ean(true).format());
099                variants.add("ISBN " + isbn.ean(true).normalizedValue());
100            }
101        }
102    }
103
104}