package org.biopax.paxtools.search;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.Version;
import org.biopax.paxtools.controller.Fetcher;
import org.biopax.paxtools.controller.ModelUtils;
import org.biopax.paxtools.controller.SimpleEditorMap;
import org.biopax.paxtools.model.BioPAXElement;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.BioSource;
import org.biopax.paxtools.model.level3.Level3Element;
import org.biopax.paxtools.model.level3.Named;
import org.biopax.paxtools.model.level3.Pathway;
import org.biopax.paxtools.model.level3.Process;
import org.biopax.paxtools.model.level3.Provenance;
import org.biopax.paxtools.model.level3.UnificationXref;
import org.biopax.paxtools.model.level3.XReferrable;
import org.biopax.paxtools.model.level3.Xref;
import org.biopax.paxtools.util.ClassFilterSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/biopax/paxtools/search/SearchEngine.class */
public class SearchEngine implements Indexer, Searcher {
    public static final String FIELD_URI = "uri";
    public static final String FIELD_XREFDB = "xrefdb";
    public static final String FIELD_PATHWAY = "pathway";
    public static final String FIELD_ORGANISM = "organism";
    public static final String FIELD_DATASOURCE = "datasource";
    public static final String FIELD_TYPE = "type";
    private final Model model;
    private int maxHitsPerPage;
    private final Analyzer analyzer;
    private final File indexFile;
    private SearcherManager searcherManager;
    public static final int DEFAULT_MAX_HITS_PER_PAGE = 100;
    private static final Logger LOG = LoggerFactory.getLogger(SearchEngine.class);
    public static final String FIELD_KEYWORD = "keyword";
    public static final String FIELD_NAME = "name";
    public static final String FIELD_XREFID = "xrefid";
    public static final String FIELD_SIZE = "size";
    public static final String[] DEFAULT_FIELDS = {FIELD_KEYWORD, FIELD_NAME, FIELD_XREFID, FIELD_SIZE};

    /* loaded from: input_file:org/biopax/paxtools/search/SearchEngine$HitAnnotation.class */
    public enum HitAnnotation {
        HIT_EXCERPT,
        HIT_SIZE,
        HIT_ORGANISM,
        HIT_DATASOURCE,
        HIT_PATHWAY
    }

    public SearchEngine(Model model, String str) {
        this.model = model;
        this.indexFile = new File(str);
        initSearcherManager();
        this.maxHitsPerPage = 100;
        this.analyzer = new StandardAnalyzer();
    }

    private void initSearcherManager() {
        try {
            if (this.indexFile.exists()) {
                this.searcherManager = new SearcherManager(MMapDirectory.open(this.indexFile), new SearcherFactory());
            } else {
                LOG.info(this.indexFile.getPath() + " does not exist.");
            }
        } catch (IOException e) {
            LOG.warn("Could not create a searcher: " + e);
        }
    }

    public void setMaxHitsPerPage(int i) {
        this.maxHitsPerPage = i;
    }

    public int getMaxHitsPerPage() {
        return this.maxHitsPerPage;
    }

    @Override // org.biopax.paxtools.search.Searcher
    public SearchResult search(String str, int i, Class<? extends BioPAXElement> cls, String[] strArr, String[] strArr2) {
        SearchResult transform;
        LOG.debug("search: " + str + ", page: " + i + ", filterBy: " + cls + "; extra filters: ds in (" + Arrays.toString(strArr) + "), org. in (" + Arrays.toString(strArr2) + ")");
        try {
            try {
                MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(DEFAULT_FIELDS, this.analyzer);
                multiFieldQueryParser.setAllowLeadingWildcard(true);
                IndexSearcher indexSearcher = (IndexSearcher) this.searcherManager.acquire();
                if (str.trim().equals("*")) {
                    if (cls == null) {
                        cls = Level3Element.class;
                    }
                    BooleanQuery booleanQuery = new BooleanQuery();
                    Iterator it = SimpleEditorMap.L3.getKnownSubClassesOf(cls).iterator();
                    while (it.hasNext()) {
                        booleanQuery.add(new TermQuery(new Term(FIELD_TYPE, ((Class) it.next()).getSimpleName().toLowerCase())), BooleanClause.Occur.SHOULD);
                    }
                    Filter createFilter = createFilter(null, strArr, strArr2);
                    TopDocs search = indexSearcher.search(booleanQuery, createFilter, this.maxHitsPerPage);
                    if (i > 0) {
                        TopScoreDocCollector create = TopScoreDocCollector.create(this.maxHitsPerPage * (i + 1), true);
                        indexSearcher.search(booleanQuery, createFilter, create);
                        search = create.topDocs(i * this.maxHitsPerPage, this.maxHitsPerPage);
                    }
                    transform = transform(booleanQuery, indexSearcher, false, search);
                } else {
                    Query parse = multiFieldQueryParser.parse(str);
                    LOG.debug("parsed lucene query is " + parse.getClass().getSimpleName());
                    Filter createFilter2 = createFilter(cls, strArr, strArr2);
                    TopDocs search2 = indexSearcher.search(parse, createFilter2, this.maxHitsPerPage);
                    if (i > 0) {
                        TopScoreDocCollector create2 = TopScoreDocCollector.create(this.maxHitsPerPage * (i + 1), true);
                        indexSearcher.search(parse, createFilter2, create2);
                        search2 = create2.topDocs(i * this.maxHitsPerPage, this.maxHitsPerPage);
                    }
                    transform = transform(parse, indexSearcher, true, search2);
                }
                if (indexSearcher != null) {
                    try {
                        this.searcherManager.release(indexSearcher);
                    } catch (IOException e) {
                    }
                }
                transform.setPage(i);
                return transform;
            } catch (Throwable th) {
                if (0 != 0) {
                    try {
                        this.searcherManager.release((Object) null);
                    } catch (IOException e2) {
                        throw th;
                    }
                }
                throw th;
            }
        } catch (ParseException e3) {
            throw new RuntimeException("getTopDocs: failed to parse the query string.", e3);
        } catch (IOException e4) {
            throw new RuntimeException("getTopDocs: failed.", e4);
        }
    }

    private SearchResult transform(Query query, IndexSearcher indexSearcher, boolean z, TopDocs topDocs) throws CorruptIndexException, IOException {
        SearchResult searchResult = new SearchResult();
        ArrayList arrayList = new ArrayList();
        searchResult.setMaxHitsPerPage(this.maxHitsPerPage);
        searchResult.setHits(arrayList);
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            String str = doc.get(FIELD_URI);
            BioPAXElement byID = this.model.getByID(str);
            LOG.debug("transform: doc:" + scoreDoc.doc + ", uri:" + str);
            if (z && doc.get(FIELD_KEYWORD) != null) {
                QueryScorer queryScorer = new QueryScorer(query, FIELD_KEYWORD);
                queryScorer.setExpandMultiTermQuery(true);
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<span class='hitHL'>", "</span>"), queryScorer);
                highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer, 80));
                String join = StringUtils.join(doc.getValues(FIELD_KEYWORD), " ");
                try {
                    String bestFragments = highlighter.getBestFragments(this.analyzer.tokenStream("", new StringReader(join)), join, 7, "...");
                    if (bestFragments != null && !bestFragments.isEmpty()) {
                        byID.getAnnotations().put(HitAnnotation.HIT_EXCERPT.name(), bestFragments);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            } else if (z) {
                LOG.warn("Highlighter skipped, because KEYWORD field was null; hit: " + str + ", " + byID.getModelInterface().getSimpleName());
            }
            if (doc.get(FIELD_ORGANISM) != null && !byID.getAnnotations().containsKey(HitAnnotation.HIT_ORGANISM.name())) {
                TreeSet treeSet = new TreeSet();
                for (String str2 : doc.getValues(FIELD_ORGANISM)) {
                    treeSet.add(str2);
                }
                byID.getAnnotations().put(HitAnnotation.HIT_ORGANISM.name(), treeSet);
            }
            if (doc.get(FIELD_DATASOURCE) != null && !byID.getAnnotations().containsKey(HitAnnotation.HIT_DATASOURCE.name())) {
                TreeSet treeSet2 = new TreeSet();
                for (String str3 : doc.getValues(FIELD_DATASOURCE)) {
                    treeSet2.add(str3);
                }
                byID.getAnnotations().put(HitAnnotation.HIT_DATASOURCE.name(), treeSet2);
            }
            if (doc.get(FIELD_PATHWAY) != null && !byID.getAnnotations().containsKey(HitAnnotation.HIT_PATHWAY.name())) {
                TreeSet treeSet3 = new TreeSet();
                for (String str4 : doc.getValues(FIELD_PATHWAY)) {
                    if (!str4.equals(str)) {
                        treeSet3.add(str4);
                    }
                }
                byID.getAnnotations().put(HitAnnotation.HIT_PATHWAY.name(), treeSet3);
            }
            if (doc.get(FIELD_SIZE) != null && !byID.getAnnotations().containsKey(HitAnnotation.HIT_SIZE.name())) {
                byID.getAnnotations().put(HitAnnotation.HIT_SIZE.name(), Integer.valueOf(doc.get(FIELD_SIZE)));
            }
            String str5 = (String) byID.getAnnotations().get(HitAnnotation.HIT_EXCERPT.name());
            if (str5 == null) {
                str5 = "";
            }
            byID.getAnnotations().put(HitAnnotation.HIT_EXCERPT.name(), str5 + " -SCORE- " + scoreDoc.score + " -EXPLANATION- " + indexSearcher.explain(query, scoreDoc.doc));
            arrayList.add(byID);
        }
        searchResult.setTotalHits(topDocs.totalHits);
        return searchResult;
    }

    @Override // org.biopax.paxtools.search.Indexer
    public void index() {
        int size = this.model.getObjects().size();
        LOG.info("index(), there are " + size + " BioPAX objects to be (re-)indexed.");
        try {
            if (this.searcherManager != null) {
                this.searcherManager.close();
                this.searcherManager = null;
            }
            final IndexWriter indexWriter = new IndexWriter(FSDirectory.open(this.indexFile), new IndexWriterConfig(Version.LATEST, this.analyzer));
            indexWriter.deleteAll();
            indexWriter.commit();
            ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(30);
            final AtomicInteger atomicInteger = new AtomicInteger(size);
            for (final BioPAXElement bioPAXElement : this.model.getObjects()) {
                newFixedThreadPool.execute(new Runnable() { // from class: org.biopax.paxtools.search.SearchEngine.1
                    @Override // java.lang.Runnable
                    public void run() {
                        Set keywords = ModelUtils.getKeywords(bioPAXElement, 3);
                        Iterator it = new HashSet(keywords).iterator();
                        while (it.hasNext()) {
                            String str = (String) it.next();
                            if (str.startsWith("REPLACED ") || str.contains("ADDED")) {
                                keywords.remove(str);
                            }
                        }
                        bioPAXElement.getAnnotations().put(SearchEngine.FIELD_KEYWORD, keywords);
                        bioPAXElement.getAnnotations().put(SearchEngine.FIELD_DATASOURCE, ModelUtils.getDatasources(bioPAXElement));
                        bioPAXElement.getAnnotations().put(SearchEngine.FIELD_ORGANISM, ModelUtils.getOrganisms(bioPAXElement));
                        bioPAXElement.getAnnotations().put(SearchEngine.FIELD_PATHWAY, ModelUtils.getParentPathways(bioPAXElement));
                        if (bioPAXElement instanceof Process) {
                            bioPAXElement.getAnnotations().put(SearchEngine.FIELD_SIZE, Integer.toString(new Fetcher(SimpleEditorMap.L3, new org.biopax.paxtools.util.Filter[]{Fetcher.nextStepFilter}).fetch(bioPAXElement, Process.class).size()));
                        }
                        SearchEngine.this.index(bioPAXElement, indexWriter);
                        int decrementAndGet = atomicInteger.decrementAndGet();
                        if (decrementAndGet % 10000 == 0) {
                            SearchEngine.LOG.info("index(), biopax objects left to index: " + decrementAndGet);
                        }
                    }
                });
            }
            newFixedThreadPool.shutdown();
            try {
                newFixedThreadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
                try {
                    indexWriter.close();
                    initSearcherManager();
                } catch (IOException e) {
                    throw new RuntimeException("Failed to close IndexWriter.", e);
                }
            } catch (InterruptedException e2) {
                throw new RuntimeException("Interrupted!", e2);
            }
        } catch (IOException e3) {
            throw new RuntimeException("Failed to create a new IndexWriter.", e3);
        }
    }

    void index(BioPAXElement bioPAXElement, IndexWriter indexWriter) {
        Document document = new Document();
        document.add(new StoredField(FIELD_URI, bioPAXElement.getRDFId()));
        document.add(new StringField(FIELD_TYPE, bioPAXElement.getModelInterface().getSimpleName().toLowerCase(), Field.Store.YES));
        if (!bioPAXElement.getAnnotations().isEmpty()) {
            if (bioPAXElement.getAnnotations().containsKey(FIELD_PATHWAY)) {
                addPathways((Set) bioPAXElement.getAnnotations().get(FIELD_PATHWAY), document);
            }
            if (bioPAXElement.getAnnotations().containsKey(FIELD_ORGANISM)) {
                addOrganisms((Set) bioPAXElement.getAnnotations().get(FIELD_ORGANISM), document);
            }
            if (bioPAXElement.getAnnotations().containsKey(FIELD_DATASOURCE)) {
                addDatasources((Set) bioPAXElement.getAnnotations().get(FIELD_DATASOURCE), document);
            }
            if (bioPAXElement.getAnnotations().containsKey(FIELD_KEYWORD)) {
                addKeywords((Set) bioPAXElement.getAnnotations().get(FIELD_KEYWORD), document);
            }
            if (bioPAXElement.getAnnotations().containsKey(FIELD_SIZE)) {
                document.add(new IntField(FIELD_SIZE, Integer.parseInt((String) bioPAXElement.getAnnotations().get(FIELD_SIZE)), Field.Store.YES));
            }
        }
        bioPAXElement.getAnnotations().remove(FIELD_KEYWORD);
        bioPAXElement.getAnnotations().remove(FIELD_DATASOURCE);
        bioPAXElement.getAnnotations().remove(FIELD_ORGANISM);
        bioPAXElement.getAnnotations().remove(FIELD_PATHWAY);
        bioPAXElement.getAnnotations().remove(FIELD_SIZE);
        if (bioPAXElement instanceof Named) {
            Named named = (Named) bioPAXElement;
            if (named.getStandardName() != null) {
                TextField textField = new TextField(FIELD_NAME, named.getStandardName(), Field.Store.NO);
                textField.setBoost(3.5f);
                document.add(textField);
            }
            if (named.getDisplayName() != null && !named.getDisplayName().equalsIgnoreCase(named.getStandardName())) {
                TextField textField2 = new TextField(FIELD_NAME, named.getDisplayName(), Field.Store.NO);
                textField2.setBoost(3.0f);
                document.add(textField2);
            }
            for (String str : named.getName()) {
                if (!str.equalsIgnoreCase(named.getDisplayName()) && !str.equalsIgnoreCase(named.getStandardName())) {
                    TextField textField3 = new TextField(FIELD_NAME, str.toLowerCase(), Field.Store.NO);
                    textField3.setBoost(2.5f);
                    document.add(textField3);
                }
            }
        }
        if (bioPAXElement instanceof XReferrable) {
            for (Xref xref : ((XReferrable) bioPAXElement).getXref()) {
                if (xref.getId() != null) {
                    document.add(new StringField(FIELD_XREFID, xref.getId().toLowerCase(), Field.Store.NO));
                }
            }
        }
        if (bioPAXElement instanceof Xref) {
            Xref xref2 = (Xref) bioPAXElement;
            if (xref2.getId() != null) {
                document.add(new StringField(FIELD_XREFID, xref2.getId().toLowerCase(), Field.Store.NO));
            }
            if (xref2.getDb() != null) {
                document.add(new TextField(FIELD_XREFDB, xref2.getDb().toLowerCase(), Field.Store.NO));
            }
        }
        try {
            indexWriter.addDocument(document);
        } catch (IOException e) {
            throw new RuntimeException("Failed to index; " + bioPAXElement.getRDFId(), e);
        }
    }

    private void addKeywords(Set<String> set, Document document) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            document.add(new TextField(FIELD_KEYWORD, it.next().toLowerCase(), Field.Store.YES));
        }
    }

    private void addDatasources(Set<Provenance> set, Document document) {
        for (Provenance provenance : set) {
            document.add(new StringField(FIELD_DATASOURCE, provenance.getRDFId(), Field.Store.YES));
            Iterator it = provenance.getName().iterator();
            while (it.hasNext()) {
                document.add(new TextField(FIELD_DATASOURCE, ((String) it.next()).toLowerCase(), Field.Store.NO));
            }
        }
    }

    private void addOrganisms(Set<BioSource> set, Document document) {
        for (BioSource bioSource : set) {
            document.add(new StoredField(FIELD_ORGANISM, bioSource.getRDFId()));
            Iterator it = bioSource.getName().iterator();
            while (it.hasNext()) {
                document.add(new TextField(FIELD_ORGANISM, ((String) it.next()).toLowerCase(), Field.Store.NO));
            }
            Iterator it2 = new ClassFilterSet(bioSource.getXref(), UnificationXref.class).iterator();
            while (it2.hasNext()) {
                UnificationXref unificationXref = (UnificationXref) it2.next();
                if (unificationXref.getId() != null) {
                    document.add(new TextField(FIELD_ORGANISM, unificationXref.getId().toLowerCase(), Field.Store.NO));
                }
            }
            if (bioSource.getTissue() != null) {
                Iterator it3 = bioSource.getTissue().getTerm().iterator();
                while (it3.hasNext()) {
                    document.add(new TextField(FIELD_ORGANISM, ((String) it3.next()).toLowerCase(), Field.Store.NO));
                }
            }
            if (bioSource.getCellType() != null) {
                Iterator it4 = bioSource.getCellType().getTerm().iterator();
                while (it4.hasNext()) {
                    document.add(new TextField(FIELD_ORGANISM, ((String) it4.next()).toLowerCase(), Field.Store.NO));
                }
            }
        }
    }

    private void addPathways(Set<Pathway> set, Document document) {
        for (Pathway pathway : set) {
            document.add(new StoredField(FIELD_PATHWAY, pathway.getRDFId()));
            for (String str : pathway.getName()) {
                document.add(new TextField(FIELD_PATHWAY, str.toLowerCase(), Field.Store.NO));
                document.add(new StoredField(FIELD_KEYWORD, str.toLowerCase()));
            }
            Iterator it = new ClassFilterSet(pathway.getXref(), UnificationXref.class).iterator();
            while (it.hasNext()) {
                UnificationXref unificationXref = (UnificationXref) it.next();
                if (unificationXref.getId() != null) {
                    document.add(new TextField(FIELD_PATHWAY, unificationXref.getId().toLowerCase(), Field.Store.NO));
                    document.add(new StoredField(FIELD_KEYWORD, unificationXref.getId().toLowerCase()));
                }
            }
        }
    }

    private String getTaxonId(BioSource bioSource) {
        String str = null;
        if (!bioSource.getXref().isEmpty()) {
            Iterator it = new ClassFilterSet(bioSource.getXref(), UnificationXref.class).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                UnificationXref unificationXref = (UnificationXref) it.next();
                if ("taxonomy".equalsIgnoreCase(unificationXref.getDb())) {
                    str = unificationXref.getId();
                    break;
                }
            }
        }
        return str;
    }

    private Filter createFilter(Class<? extends BioPAXElement> cls, String[] strArr, String[] strArr2) {
        BooleanQuery booleanQuery = new BooleanQuery();
        if (strArr != null && strArr.length > 0) {
            booleanQuery.add(subQuery(strArr, FIELD_DATASOURCE), BooleanClause.Occur.MUST);
        }
        if (strArr2 != null && strArr2.length > 0) {
            booleanQuery.add(subQuery(strArr2, FIELD_ORGANISM), BooleanClause.Occur.MUST);
        }
        if (cls != null) {
            BooleanQuery booleanQuery2 = new BooleanQuery();
            booleanQuery2.add(new TermQuery(new Term(FIELD_TYPE, cls.getSimpleName().toLowerCase())), BooleanClause.Occur.SHOULD);
            Iterator it = SimpleEditorMap.L3.getKnownSubClassesOf(cls).iterator();
            while (it.hasNext()) {
                booleanQuery2.add(new TermQuery(new Term(FIELD_TYPE, ((Class) it.next()).getSimpleName().toLowerCase())), BooleanClause.Occur.SHOULD);
            }
            booleanQuery.add(booleanQuery2, BooleanClause.Occur.MUST);
        }
        if (booleanQuery.clauses().isEmpty()) {
            return null;
        }
        LOG.debug("filterQuery: " + booleanQuery.toString());
        return new CachingWrapperFilter(new QueryWrapperFilter(booleanQuery));
    }

    private Query subQuery(String[] strArr, String str) {
        BooleanQuery booleanQuery = new BooleanQuery();
        Pattern compile = Pattern.compile("\\s");
        for (String str2 : strArr) {
            if (compile.matcher(str2).find()) {
                BooleanQuery booleanQuery2 = new BooleanQuery();
                for (String str3 : str2.split("\\s+")) {
                    booleanQuery2.add(new TermQuery(new Term(str, str3.toLowerCase())), BooleanClause.Occur.MUST);
                    LOG.debug("subQuery, add part: " + str3.toLowerCase());
                }
                booleanQuery.add(booleanQuery2, BooleanClause.Occur.SHOULD);
            } else {
                booleanQuery.add(new TermQuery(new Term(str, str2.toLowerCase())), BooleanClause.Occur.SHOULD);
            }
        }
        return booleanQuery;
    }
}
