Search in sources :

Example 61 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadSpans method testSpanNot.

public void testSpanNot() throws Exception {
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
    SpanQuery spq = new SpanNearQuery(clauses, 5, true);
    SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
    Document doc = new Document();
    doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader reader = getOnlyLeafReader(writer.getReader());
    writer.close();
    checkSpans(snq.createWeight(newSearcher(reader, false), false, 1f).getSpans(reader.leaves().get(0), SpanWeight.Postings.PAYLOADS), 1, new int[] { 2 });
    reader.close();
    directory.close();
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Directory(org.apache.lucene.store.Directory)

Example 62 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadSpans method testSpanFirst.

public void testSpanFirst() throws IOException {
    SpanQuery match;
    SpanFirstQuery sfq;
    match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    sfq = new SpanFirstQuery(match, 2);
    Spans spans = sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    checkSpans(spans, 109, 1, 1, 1);
    //Test more complicated subclause
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
    match = new SpanNearQuery(clauses, 0, true);
    sfq = new SpanFirstQuery(match, 2);
    checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
    match = new SpanNearQuery(clauses, 0, false);
    sfq = new SpanFirstQuery(match, 2);
    checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
}
Also used : SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans)

Example 63 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class QueryBuilder method createSpanQuery.

/**
   * Creates a span query from the tokenstream.  In the case of a single token, a simple <code>SpanTermQuery</code> is
   * returned.  When multiple tokens, an ordered <code>SpanNearQuery</code> with slop of 0 is returned.
   */
protected final SpanQuery createSpanQuery(TokenStream in, String field) throws IOException {
    TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
    if (termAtt == null) {
        return null;
    }
    List<SpanTermQuery> terms = new ArrayList<>();
    while (in.incrementToken()) {
        terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
    }
    if (terms.isEmpty()) {
        return null;
    } else if (terms.size() == 1) {
        return terms.get(0);
    } else {
        return new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true);
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 64 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestDisjunctionMaxQuery method testBooleanSpanQuery.

// LUCENE-4477 / LUCENE-4401:
public void testBooleanSpanQuery() throws Exception {
    int hits = 0;
    Directory directory = newDirectory();
    Analyzer indexerAnalyzer = new MockAnalyzer(random());
    IndexWriterConfig config = new IndexWriterConfig(indexerAnalyzer);
    IndexWriter writer = new IndexWriter(directory, config);
    String FIELD = "content";
    Document d = new Document();
    d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
    writer.addDocument(d);
    writer.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(indexReader);
    DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(new SpanTermQuery(new Term(FIELD, "clockwork")), new SpanTermQuery(new Term(FIELD, "clckwork"))), 1.0f);
    TopScoreDocCollector collector = TopScoreDocCollector.create(1000);
    searcher.search(query, collector);
    hits = collector.topDocs().scoreDocs.length;
    for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs) {
        System.out.println(scoreDoc.doc);
    }
    indexReader.close();
    assertEquals(hits, 1);
    directory.close();
}
Also used : Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 65 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class HighlighterPhraseTest method testSparseSpan.

public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, new TokenStreamSparse(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(0, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)87 Term (org.apache.lucene.index.Term)84 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 SpanQuery (org.apache.lucene.search.spans.SpanQuery)48 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)23 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)23 TopDocs (org.apache.lucene.search.TopDocs)20 Query (org.apache.lucene.search.Query)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)18 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 TermQuery (org.apache.lucene.search.TermQuery)18 Directory (org.apache.lucene.store.Directory)17 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 TextField (org.apache.lucene.document.TextField)14 ArrayList (java.util.ArrayList)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 Spans (org.apache.lucene.search.spans.Spans)11 TokenStream (org.apache.lucene.analysis.TokenStream)10 IndexWriter (org.apache.lucene.index.IndexWriter)10