Search in sources :

Example 16 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.

public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "the")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Example 17 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class PayloadSpanUtil method queryToSpanQuery.

private void queryToSpanQuery(Query query, Collection<byte[]> payloads) throws IOException {
    if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                queryToSpanQuery(clause.getQuery(), payloads);
            }
        }
    } else if (query instanceof PhraseQuery) {
        Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = ((PhraseQuery) query).getSlop();
        boolean inorder = false;
        if (slop == 0) {
            inorder = true;
        }
        SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
        getPayloads(payloads, sp);
    } else if (query instanceof TermQuery) {
        SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
        getPayloads(payloads, stq);
    } else if (query instanceof SpanQuery) {
        getPayloads(payloads, (SpanQuery) query);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext(); ) {
            queryToSpanQuery(iterator.next(), payloads);
        }
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "rawtypes", "unchecked" }) final List<Query>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<Query> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (final Term term : termArray) {
                    disjuncts.add(new SpanTermQuery(term));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<Query> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            getPayloads(payloads, sp);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Query(org.apache.lucene.search.Query) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) List(java.util.List) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 18 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testMtq.

/**
   * Like {@link #testRewriteAndMtq} but no rewrite.
   */
public void testMtq() throws IOException {
    indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
    initReaderSearcherHighlighter();
    SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("body", "bravo")), // does NOT rewrite
    new SpanTermQuery(new Term("body", "charlie")) }, 0, true);
    BooleanQuery query = new BooleanQuery.Builder().add(snq, BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "al")), // MTQ
    BooleanClause.Occur.MUST).add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST).add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>" }, snippets);
    // do again, this time with MTQ disabled.
    //disable but leave phrase processing enabled
    highlighter.setHandleMultiTermQuery(false);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha" }, snippets);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 19 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class BoostingTermBuilder method getSpanQuery.

@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
    String value = DOMUtils.getNonBlankTextOrFail(e);
    SpanQuery btq = new PayloadScoreQuery(new SpanTermQuery(new Term(fieldName, value)), new AveragePayloadFunction());
    btq = new SpanBoostQuery(btq, DOMUtils.getAttribute(e, "boost", 1.0f));
    return btq;
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) Term(org.apache.lucene.index.Term) AveragePayloadFunction(org.apache.lucene.queries.payloads.AveragePayloadFunction) SpanQuery(org.apache.lucene.search.spans.SpanQuery) PayloadScoreQuery(org.apache.lucene.queries.payloads.PayloadScoreQuery)

Example 20 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestQueryParser method testMultiWordSynonyms.

// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testMultiWordSynonyms() throws Exception {
    QueryParser dumb = new QueryParser("field", new Analyzer1());
    dumb.setSplitOnWhitespace(false);
    TermQuery guinea = new TermQuery(new Term("field", "guinea"));
    TermQuery pig = new TermQuery(new Term("field", "pig"));
    TermQuery cavy = new TermQuery(new Term("field", "cavy"));
    // A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
    BooleanQuery.Builder synonym = new BooleanQuery.Builder();
    synonym.add(guinea, BooleanClause.Occur.MUST);
    synonym.add(pig, BooleanClause.Occur.MUST);
    BooleanQuery guineaPig = synonym.build();
    PhraseQuery phraseGuineaPig = new PhraseQuery.Builder().add(new Term("field", "guinea")).add(new Term("field", "pig")).build();
    BooleanQuery graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
    assertEquals(graphQuery, dumb.parse("guinea pig"));
    // With the phrase operator, a multi-word synonym source will form span near queries.
    SpanNearQuery spanGuineaPig = SpanNearQuery.newOrderedNearQuery("field").addClause(new SpanTermQuery(new Term("field", "guinea"))).addClause(new SpanTermQuery(new Term("field", "pig"))).setSlop(0).build();
    SpanTermQuery spanCavy = new SpanTermQuery(new Term("field", "cavy"));
    SpanOrQuery spanPhrase = new SpanOrQuery(new SpanQuery[] { spanGuineaPig, spanCavy });
    assertEquals(spanPhrase, dumb.parse("\"guinea pig\""));
    // custom behavior, the synonyms are expanded, unless you use quote operator
    QueryParser smart = new SmartQueryParser();
    smart.setSplitOnWhitespace(false);
    graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
    assertEquals(graphQuery, smart.parse("guinea pig"));
    assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)196 Term (org.apache.lucene.index.Term)191 SpanQuery (org.apache.lucene.search.spans.SpanQuery)121 Test (org.junit.Test)103 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)59 KrillIndex (de.ids_mannheim.korap.KrillIndex)57 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)35 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)34 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)31 Result (de.ids_mannheim.korap.response.Result)30 ArrayList (java.util.ArrayList)27 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)24 BooleanQuery (org.apache.lucene.search.BooleanQuery)22 Query (org.apache.lucene.search.Query)22 TermQuery (org.apache.lucene.search.TermQuery)22 TopDocs (org.apache.lucene.search.TopDocs)21 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)20 IndexSearcher (org.apache.lucene.search.IndexSearcher)20 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)18