Search in sources :

Example 81 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class IdfPassageScorer method score.

@Override
public void score(String query, Map<String, Float> sentences) throws Exception {
    EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords));
    QueryParser queryParser = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, englishAnalyzer);
    ClassicSimilarity similarity = new ClassicSimilarity();
    String escapedQuery = queryParser.escape(query);
    Query question = queryParser.parse(escapedQuery);
    HashSet<String> questionTerms = new HashSet<>(Arrays.asList(question.toString().trim().toLowerCase().split("\\s+")));
    EnglishAnalyzer englishAnalyzerWithStop = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser queryParserWithStop = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, englishAnalyzerWithStop);
    Query questionWithStopWords = queryParserWithStop.parse(escapedQuery);
    HashSet<String> questionTermsIDF = new HashSet<>(Arrays.asList(questionWithStopWords.toString().trim().toLowerCase().split("\\s+")));
    // add the question terms to the termIDF Map
    for (String questionTerm : questionTermsIDF) {
        try {
            TermQuery q = (TermQuery) queryParserWithStop.parse(questionTerm);
            Term t = q.getTerm();
            double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
            termIdfMap.put(questionTerm, String.valueOf(termIDF));
        } catch (Exception e) {
            continue;
        }
    }
    // avoid duplicate passages
    HashSet<String> seenSentences = new HashSet<>();
    for (Map.Entry<String, Float> sent : sentences.entrySet()) {
        double idf = 0.0;
        HashSet<String> seenTerms = new HashSet<>();
        String[] terms = sent.getKey().toLowerCase().split("\\s+");
        for (String term : terms) {
            try {
                TermQuery q = (TermQuery) queryParser.parse(term);
                Term t = q.getTerm();
                double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
                if (questionTerms.contains(t.toString()) && !seenTerms.contains(t.toString())) {
                    idf += termIDF;
                    seenTerms.add(t.toString());
                }
                TermQuery q2 = (TermQuery) queryParserWithStop.parse(term);
                Term t2 = q2.getTerm();
                double termIDFwithStop = similarity.idf(reader.docFreq(t2), reader.numDocs());
                termIdfMap.put(term, String.valueOf(termIDFwithStop));
            } catch (Exception e) {
                continue;
            }
        }
        double weightedScore = idf + 0.0001 * sent.getValue();
        ScoredPassage scoredPassage = new ScoredPassage(sent.getKey(), weightedScore, sent.getValue());
        if ((scoredPassageHeap.size() < topPassages || weightedScore > scoredPassageHeap.peekLast().getScore()) && !seenSentences.contains(sent)) {
            if (scoredPassageHeap.size() == topPassages) {
                scoredPassageHeap.pollLast();
            }
            scoredPassageHeap.add(scoredPassage);
            seenSentences.add(sent.getKey());
        }
    }
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 82 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class SearchWebCollection method search.

/**
 * Prints TREC submission file to the standard output stream.
 *
 * @param topics     queries
 * @param similarity similarity
 * @throws IOException
 * @throws ParseException
 */
public void search(SortedMap<Integer, String> topics, String submissionFile, Similarity similarity, int numHits, RerankerCascade cascade, boolean useQueryParser, boolean keepstopwords) throws IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(similarity);
    final String runTag = "BM25_EnglishAnalyzer_" + (keepstopwords ? "KeepStopwords_" : "") + FIELD_BODY + "_" + similarity.toString();
    PrintWriter out = new PrintWriter(Files.newBufferedWriter(Paths.get(submissionFile), StandardCharsets.US_ASCII));
    EnglishAnalyzer ea = keepstopwords ? new EnglishAnalyzer(CharArraySet.EMPTY_SET) : new EnglishAnalyzer();
    QueryParser queryParser = new QueryParser(FIELD_BODY, ea);
    queryParser.setDefaultOperator(QueryParser.Operator.OR);
    for (Map.Entry<Integer, String> entry : topics.entrySet()) {
        int qID = entry.getKey();
        String queryString = entry.getValue();
        Query query = useQueryParser ? queryParser.parse(queryString) : AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, ea, queryString);
        /**
         * For Web Tracks 2010,2011,and 2012; an experimental run consists of the top 10,000 documents for each topic query.
         */
        TopDocs rs = searcher.search(query, numHits);
        ScoreDoc[] hits = rs.scoreDocs;
        List<String> queryTokens = AnalyzerUtils.tokenize(ea, queryString);
        RerankerContext context = new RerankerContext(searcher, query, String.valueOf(qID), queryString, queryTokens, FIELD_BODY, null);
        ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
        /**
         * the first column is the topic number.
         * the second column is currently unused and should always be "Q0".
         * the third column is the official document identifier of the retrieved document.
         * the fourth column is the rank the document is retrieved.
         * the fifth column shows the score (integer or floating point) that generated the ranking.
         * the sixth column is called the "run tag" and should be a unique identifier for your
         */
        for (int i = 0; i < docs.documents.length; i++) {
            out.println(String.format("%d Q0 %s %d %f %s", qID, docs.documents[i].getField(FIELD_ID).stringValue(), (i + 1), docs.scores[i], runTag));
        }
    }
    out.flush();
    out.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ScoredDocuments(io.anserini.rerank.ScoredDocuments) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Map(java.util.Map) SortedMap(java.util.SortedMap) RerankerContext(io.anserini.rerank.RerankerContext) PrintWriter(java.io.PrintWriter)

Example 83 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project jspwiki by apache.

the class LuceneSearchProvider method findPages.

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags, WikiContext wikiContext) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;
    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_47, queryfields, getLuceneAnalyzer());
        // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);
        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }
        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = DirectoryReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }
        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;
        AuthorizationManager mgr = m_engine.getAuthorizationManager();
        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);
            if (page != null) {
                if (page instanceof Attachment) {
                // Currently attachments don't look nice on the search-results page
                // When the search-results are cleaned up this can be enabled again.
                }
                PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION);
                if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) {
                    int score = (int) (hits[curr].score * 100);
                    // Get highlighted search contexts
                    String text = doc.get(LUCENE_PAGE_CONTENTS);
                    String[] fragments = new String[0];
                    if (text != null && highlighter != null) {
                        TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
                        fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
                    }
                    SearchResult result = new SearchResultImpl(page, score, fragments);
                    list.add(result);
                }
            } else {
                log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);
        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.getIndexReader().close();
            } catch (IOException e) {
                log.error(e);
            }
        }
    }
    return list;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) ProviderException(org.apache.wiki.api.exceptions.ProviderException) WikiPage(org.apache.wiki.WikiPage) Attachment(org.apache.wiki.attachment.Attachment) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) StringReader(java.io.StringReader) Highlighter(org.apache.lucene.search.highlight.Highlighter) Directory(org.apache.lucene.store.Directory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) SimpleHTMLEncoder(org.apache.lucene.search.highlight.SimpleHTMLEncoder) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) IOException(java.io.IOException) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoRequiredPropertyException(org.apache.wiki.api.exceptions.NoRequiredPropertyException) InternalWikiException(org.apache.wiki.InternalWikiException) ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) IOException(java.io.IOException) ProviderException(org.apache.wiki.api.exceptions.ProviderException) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) AuthorizationManager(org.apache.wiki.auth.AuthorizationManager) ParseException(org.apache.lucene.queryparser.classic.ParseException) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) File(java.io.File) PagePermission(org.apache.wiki.auth.permissions.PagePermission)

Example 84 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project derby by apache.

the class LuceneQueryVTI method initScan.

// ///////////////////////////////////////////////////////////////////
// 
// MINIONS
// 
// ///////////////////////////////////////////////////////////////////
/**
 * Initialize the metadata and scan
 */
private void initScan() throws SQLException {
    try {
        // read the execution context for this AwareVTI
        VTIContext context = getContext();
        _schema = context.vtiSchema();
        String[] nameParts = LuceneSupport.decodeFunctionName(context.vtiTable());
        _table = nameParts[LuceneSupport.TABLE_PART];
        _column = nameParts[LuceneSupport.COLUMN_PART];
        // divine the column names
        VTITemplate.ColumnDescriptor[] returnColumns = getReturnTableSignature(_connection);
        String[] columnNames = new String[returnColumns.length];
        for (int i = 0; i < returnColumns.length; i++) {
            columnNames[i] = returnColumns[i].columnName;
        }
        setColumnNames(columnNames);
        _scoreColumnID = getColumnCount();
        _docIDColumnID = _scoreColumnID - 1;
        _maxKeyID = _docIDColumnID - 1;
        _minKeyID = 1;
        // make sure the user has SELECT privilege on all relevant columns of the underlying table
        vetPrivileges();
        String delimitedColumnName = LuceneSupport.delimitID(_column);
        DerbyLuceneDir derbyLuceneDir = LuceneSupport.getDerbyLuceneDir(_connection, _schema, _table, delimitedColumnName);
        StorageFile propertiesFile = LuceneSupport.getIndexPropertiesFile(derbyLuceneDir);
        Properties indexProperties = readIndexProperties(propertiesFile);
        String indexDescriptorMaker = indexProperties.getProperty(LuceneSupport.INDEX_DESCRIPTOR_MAKER);
        LuceneIndexDescriptor indexDescriptor = getIndexDescriptor(indexDescriptorMaker);
        Analyzer analyzer = indexDescriptor.getAnalyzer();
        QueryParser qp = indexDescriptor.getQueryParser();
        vetLuceneVersion(indexProperties.getProperty(LuceneSupport.LUCENE_VERSION));
        _indexReader = getIndexReader(derbyLuceneDir);
        _searcher = new IndexSearcher(_indexReader);
        Query luceneQuery = qp.parse(_queryText);
        TopScoreDocCollector tsdc = TopScoreDocCollector.create(_windowSize, true);
        if (_scoreCeiling != null) {
            tsdc = TopScoreDocCollector.create(_windowSize, new ScoreDoc(0, _scoreCeiling), true);
        }
        searchAndScore(luceneQuery, tsdc);
    } catch (IOException ioe) {
        throw ToolUtilities.wrap(ioe);
    } catch (ParseException pe) {
        throw ToolUtilities.wrap(pe);
    } catch (PrivilegedActionException pae) {
        throw ToolUtilities.wrap(pae);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) VTIContext(org.apache.derby.vti.VTIContext) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) PrivilegedActionException(java.security.PrivilegedActionException) IOException(java.io.IOException) Properties(java.util.Properties) Analyzer(org.apache.lucene.analysis.Analyzer) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) LuceneIndexDescriptor(org.apache.derby.optional.api.LuceneIndexDescriptor) StorageFile(org.apache.derby.io.StorageFile) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 85 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project jackrabbit-oak by apache.

the class LuceneIndex method query.

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    final Set<String> relPaths = getRelativePaths(ft);
    if (relPaths.size() > 1) {
        return new MultiLuceneIndex(filter, rootState, relPaths).query();
    }
    final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
    // we only restrict non-full-text conditions if there is
    // no relative property in the full-text constraint
    final boolean nonFullTextConstraints = parent.isEmpty();
    final int parentDepth = getDepth(parent);
    QueryLimits settings = filter.getQueryLimits();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {

        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();

        private final Set<String> seenPaths = Sets.newHashSet();

        private ScoreDoc lastDoc;

        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;

        private boolean noDocs = false;

        private long lastSearchIndexerVersion;

        private int reloadCount;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (!parent.isEmpty()) {
                    // TODO OAK-828 this breaks node aggregation
                    // get the base path
                    // ensure the path ends with the given
                    // relative path
                    // if (!path.endsWith("/" + parent)) {
                    // continue;
                    // }
                    path = getAncestorPath(path, parentDepth);
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        return null;
                    }
                    seenPaths.add(path);
                }
                return new LuceneResultRow(path, doc.score, excerpt);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {
            if (noDocs) {
                return false;
            }
            ScoreDoc lastDocToRecord = null;
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TopDocs docs;
                    long time = System.currentTimeMillis();
                    checkForIndexVersionChange(searcher);
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            docs = searcher.search(query, nextBatchSize);
                        }
                        time = System.currentTimeMillis() - time;
                        LOG.debug("... took {} ms", time);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                        }
                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(analyzer, searcher, doc);
                            }
                            LuceneResultRow row = convertToRow(doc, searcher, excerpt);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }
                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
                    // ACL filter spellchecks
                    Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(suggestion.string);
                                    break;
                                }
                            }
                        }
                    }
                    queue.add(new LuceneResultRow(suggestedWords));
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
                    // ACL filter suggestions
                    Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer());
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add("{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
                                    break;
                                }
                            }
                        }
                    }
                    queue.add(new LuceneResultRow(suggestedWords));
                }
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }
            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = LucenePropertyIndex.getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                reloadCount++;
                if (reloadCount > MAX_RELOAD_COUNT) {
                    LOG.error("More than {} index version changes detected for query {}", MAX_RELOAD_COUNT, plan);
                    throw new IllegalStateException("Too many version changes");
                }
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset; reload {}", currentVersion, lastSearchIndexerVersion, reloadCount);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {

        @Override
        public long getSize() {
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, settings, sizeEstimator, filter);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Set(java.util.Set) HashSet(java.util.HashSet) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryLimits(org.apache.jackrabbit.oak.spi.query.QueryLimits) TopDocs(org.apache.lucene.search.TopDocs) PathStoredFieldVisitor(org.apache.jackrabbit.oak.plugins.index.lucene.util.PathStoredFieldVisitor) Lookup(org.apache.lucene.search.suggest.Lookup) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) AbstractIterator(com.google.common.collect.AbstractIterator) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) IOException(java.io.IOException) Deque(java.util.Deque) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellcheckHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper) Collection(java.util.Collection)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)114 Query (org.apache.lucene.search.Query)79 IndexSearcher (org.apache.lucene.search.IndexSearcher)54 Document (org.apache.lucene.document.Document)47 TopDocs (org.apache.lucene.search.TopDocs)39 ParseException (org.apache.lucene.queryparser.classic.ParseException)38 IOException (java.io.IOException)35 Analyzer (org.apache.lucene.analysis.Analyzer)34 ScoreDoc (org.apache.lucene.search.ScoreDoc)31 IndexReader (org.apache.lucene.index.IndexReader)27 ArrayList (java.util.ArrayList)25 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)24 BooleanQuery (org.apache.lucene.search.BooleanQuery)23 TermQuery (org.apache.lucene.search.TermQuery)19 MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)12 HashSet (java.util.HashSet)10 Directory (org.apache.lucene.store.Directory)10 IndexWriter (org.apache.lucene.index.IndexWriter)9 IndexableField (org.apache.lucene.index.IndexableField)8 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)7