use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TestPayloadSpans method testNestedSpans.
public void testNestedSpans() throws Exception {
SpanTermQuery stq;
Spans spans;
IndexSearcher searcher = getSearcher();
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
spans = stq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
assertNull(spans);
SpanQuery[] clauses = new SpanQuery[3];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
spans = spanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[] { 3, 3 });
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
spanNearQuery = new SpanNearQuery(clauses, 6, true);
spans = spanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[] { 3 });
clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
spanNearQuery = new SpanNearQuery(clauses, 6, true);
// xx within 6 of rr
SpanQuery[] clauses2 = new SpanQuery[2];
clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
clauses2[1] = spanNearQuery;
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
// yy within 6 of xx within 6 of rr
spans = nestedSpanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[] { 3, 3 });
closeIndexReader.close();
directory.close();
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TestPayloadSpans method testShrinkToAfterShortestMatch3.
public void testShrinkToAfterShortestMatch3() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));
Document doc = new Document();
doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher is = newSearcher(getOnlyLeafReader(reader), false);
writer.close();
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
VerifyingCollector collector = new VerifyingCollector();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
for (final BytesRef payload : collector.payloads) {
payloadSet.add(Term.toString(payload));
}
}
}
}
assertEquals(2, payloadSet.size());
if (VERBOSE) {
for (final String payload : payloadSet) System.out.println("match:" + payload);
}
assertTrue(payloadSet.contains("a:Noise:10"));
assertTrue(payloadSet.contains("k:Noise:11"));
reader.close();
directory.close();
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TestPayloadSpans method testSpanNot.
public void testSpanNot() throws Exception {
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
SpanQuery spq = new SpanNearQuery(clauses, 5, true);
SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
Document doc = new Document();
doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = getOnlyLeafReader(writer.getReader());
writer.close();
checkSpans(snq.createWeight(newSearcher(reader, false), false, 1f).getSpans(reader.leaves().get(0), SpanWeight.Postings.PAYLOADS), 1, new int[] { 2 });
reader.close();
directory.close();
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TestPayloadSpans method testSpanFirst.
public void testSpanFirst() throws IOException {
SpanQuery match;
SpanFirstQuery sfq;
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
sfq = new SpanFirstQuery(match, 2);
Spans spans = sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
checkSpans(spans, 109, 1, 1, 1);
//Test more complicated subclause
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
match = new SpanNearQuery(clauses, 0, true);
sfq = new SpanFirstQuery(match, 2);
checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
match = new SpanNearQuery(clauses, 0, false);
sfq = new SpanFirstQuery(match, 2);
checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testSparseSpan.
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamSparse(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(0, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
Aggregations