Search in sources :

Example 66 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class SynonymTokenizer method testGetBestFragmentsFilteredQuery.

public void testGetBestFragmentsFilteredQuery() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            SpanQuery[] clauses = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")) };
            SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(snq, Occur.MUST);
            bq.add(TermRangeQuery.newStringRange("contents", "john", "john", true, true), Occur.FILTER);
            doSearching(bq.build());
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
            // Currently highlights "John" and "Kennedy" separately
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 67 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class HighlighterPhraseTest method testConcurrentSpan.

public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox jumped";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
        final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
        indexSearcher.search(phraseQuery, new SimpleCollector() {

            private int baseDoc;

            @Override
            public void collect(int i) {
                bitset.set(this.baseDoc + i);
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                this.baseDoc = context.docBase;
            }

            @Override
            public void setScorer(org.apache.lucene.search.Scorer scorer) {
            // Do Nothing
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        assertEquals(1, bitset.cardinality());
        final int maxDoc = indexReader.maxDoc();
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        for (int position = bitset.nextSetBit(0); position < maxDoc - 1; position = bitset.nextSetBit(position + 1)) {
            assertEquals(0, position);
            final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(position), -1);
            assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
        }
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SimpleCollector(org.apache.lucene.search.SimpleCollector) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) FieldType(org.apache.lucene.document.FieldType) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 68 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extractWeightedSpanTerms.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>SpanQuery</code>.
   * 
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @param spanQuery
   *          SpanQuery to extract Terms from
   * @throws IOException If there is a low-level I/O error
   */
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery, float boost) throws IOException {
    Set<String> fieldNames;
    if (fieldName == null) {
        fieldNames = new HashSet<>();
        collectSpanQueryFields(spanQuery, fieldNames);
    } else {
        fieldNames = new HashSet<>(1);
        fieldNames.add(fieldName);
    }
    // To support the use of the default field name
    if (defaultField != null) {
        fieldNames.add(defaultField);
    }
    Map<String, SpanQuery> queries = new HashMap<>();
    Set<Term> nonWeightedTerms = new HashSet<>();
    final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
    final IndexSearcher searcher = new IndexSearcher(getLeafContext());
    searcher.setQueryCache(null);
    if (mustRewriteQuery) {
        for (final String field : fieldNames) {
            final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
            queries.put(field, rewrittenQuery);
            rewrittenQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
        }
    } else {
        spanQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
    }
    List<PositionSpan> spanPositions = new ArrayList<>();
    for (final String field : fieldNames) {
        final SpanQuery q;
        if (mustRewriteQuery) {
            q = queries.get(field);
        } else {
            q = spanQuery;
        }
        LeafReaderContext context = getLeafContext();
        SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false);
        Bits acceptDocs = context.reader().getLiveDocs();
        final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
        if (spans == null) {
            return;
        }
        // collect span positions
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
                continue;
            }
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
            }
        }
    }
    if (spanPositions.size() == 0) {
        // no spans found
        return;
    }
    for (final Term queryTerm : nonWeightedTerms) {
        if (fieldNameComparator(queryTerm.field())) {
            WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
            if (weightedSpanTerm == null) {
                weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
                weightedSpanTerm.addPositionSpans(spanPositions);
                weightedSpanTerm.positionSensitive = true;
                terms.put(queryTerm.text(), weightedSpanTerm);
            } else {
                if (spanPositions.size() > 0) {
                    weightedSpanTerm.addPositionSpans(spanPositions);
                }
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans) SpanWeight(org.apache.lucene.search.spans.SpanWeight) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) HashSet(java.util.HashSet)

Example 69 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class MemoryIndexOffsetStrategy method buildCombinedAutomaton.

/**
   * Build one {@link CharacterRunAutomaton} matching any term the query might match.
   */
private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher, BytesRef[] terms, CharacterRunAutomaton[] automata, PhraseHelper strictPhrases, Function<Query, Collection<Query>> multiTermQueryRewrite) {
    List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
    if (terms.length > 0) {
        allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
    }
    Collections.addAll(allAutomata, automata);
    for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
        Collections.addAll(allAutomata, //true==lookInSpan
        MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));
    }
    if (allAutomata.size() == 1) {
        return allAutomata.get(0);
    }
    // Return an aggregate CharacterRunAutomaton of others
    return new // the makeEmpty() is bogus; won't be used
    CharacterRunAutomaton(// the makeEmpty() is bogus; won't be used
    Automata.makeEmpty()) {

        @Override
        public boolean run(char[] chars, int offset, int length) {
            for (int i = 0; i < allAutomata.size(); i++) {
                // don't use foreach to avoid Iterator allocation
                if (allAutomata.get(i).run(chars, offset, length)) {
                    return true;
                }
            }
            return false;
        }
    };
}
Also used : CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 70 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class SynonymTokenizer method testNearSpanSimpleQuery.

public void testNearSpanSimpleQuery() throws Exception {
    doSearching(new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "beginning")), new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            mode = QUERY;
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
        }
    };
    helper.run();
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
Also used : TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Aggregations

SpanQuery (org.apache.lucene.search.spans.SpanQuery)81 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)53 Term (org.apache.lucene.index.Term)51 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)43 Query (org.apache.lucene.search.Query)27 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)25 IndexReader (org.apache.lucene.index.IndexReader)18 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)18 Document (org.apache.lucene.document.Document)17 IndexSearcher (org.apache.lucene.search.IndexSearcher)16 BooleanQuery (org.apache.lucene.search.BooleanQuery)15 TopDocs (org.apache.lucene.search.TopDocs)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)13 TermQuery (org.apache.lucene.search.TermQuery)13 ArrayList (java.util.ArrayList)12 Spans (org.apache.lucene.search.spans.Spans)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)10 SpanNotQuery (org.apache.lucene.search.spans.SpanNotQuery)10 Directory (org.apache.lucene.store.Directory)10