use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class SynonymTokenizer method testPayloadQuery.
/** We can highlight based on payloads. It's supported both via term vectors and MemoryIndex since Lucene 5. */
public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
//"words" at positions 1 & 4
final String text = "random words and words";
//sets payload to "pos: X" (where X is position #)
Analyzer analyzer = new MockPayloadAnalyzer();
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
writer.deleteAll();
Document doc = new Document();
doc.add(new Field(FIELD_NAME, text, fieldType));
writer.addDocument(doc);
writer.commit();
}
try (IndexReader reader = DirectoryReader.open(dir)) {
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")), //just match the first "word" occurrence
Collections.singletonList(new BytesRef("pos: 1")));
IndexSearcher searcher = newSearcher(reader);
QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
scorer.setUsePayloads(true);
Highlighter h = new Highlighter(scorer);
TopDocs hits = searcher.search(query, 10);
assertEquals(1, hits.scoreDocs.length);
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
if (random().nextBoolean()) {
//conceals detection of TokenStreamFromTermVector
stream = new CachingTokenFilter(stream);
}
String result = h.getBestFragment(stream, text);
//only highlight first "word"
assertEquals("random <B>words</B> and words", result);
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestSolrCoreParser method checkApacheLuceneSolr.
private static void checkApacheLuceneSolr(Query query, String fieldName) {
assertTrue(query instanceof SpanNearQuery);
final SpanNearQuery snq = (SpanNearQuery) query;
assertEquals(fieldName, snq.getField());
assertEquals(42, snq.getSlop());
assertFalse(snq.isInOrder());
assertEquals(3, snq.getClauses().length);
assertTrue(snq.getClauses()[0] instanceof SpanTermQuery);
assertTrue(snq.getClauses()[1] instanceof SpanTermQuery);
assertTrue(snq.getClauses()[2] instanceof SpanTermQuery);
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestSolrCoreParser method checkChooseOneWordQuery.
private static void checkChooseOneWordQuery(boolean span, Query query, String fieldName, String... expectedTermTexts) {
final Term term;
if (span) {
assertTrue(query instanceof SpanTermQuery);
final SpanTermQuery stq = (SpanTermQuery) query;
term = stq.getTerm();
} else {
assertTrue(query instanceof TermQuery);
final TermQuery tq = (TermQuery) query;
term = tq.getTerm();
}
final String text = term.text();
boolean foundExpected = false;
for (String expected : expectedTermTexts) {
foundExpected |= expected.equals(text);
}
assertEquals(fieldName, term.field());
assertTrue("expected term text (" + text + ") not found in (" + expectedTermTexts + ")", foundExpected);
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestPositionIncrement method testPayloadsPos0.
public void testPayloadsPos0() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
Document doc = new Document();
doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
writer.addDocument(doc);
final IndexReader readerFromWriter = writer.getReader();
LeafReader r = getOnlyLeafReader(readerFromWriter);
PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL);
int count = 0;
assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// "a" occurs 4 times
assertEquals(4, tp.freq());
assertEquals(0, tp.nextPosition());
assertEquals(1, tp.nextPosition());
assertEquals(3, tp.nextPosition());
assertEquals(6, tp.nextPosition());
// only one doc has "a"
assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());
IndexSearcher is = newSearcher(getOnlyLeafReader(readerFromWriter));
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
count = 0;
boolean sawZero = false;
if (VERBOSE) {
System.out.println("\ngetPayloadSpans test");
}
PayloadSpanCollector collector = new PayloadSpanCollector();
Spans pspans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if (VERBOSE) {
System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition() + " to " + pspans.endPosition());
}
collector.reset();
pspans.collect(collector);
sawZero |= pspans.startPosition() == 0;
for (BytesRef payload : collector.payloads) {
count++;
if (VERBOSE) {
System.out.println(" payload: " + Term.toString(payload));
}
}
}
}
assertTrue(sawZero);
assertEquals(8, count);
// System.out.println("\ngetSpans test");
Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
count = 0;
sawZero = false;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
count++;
sawZero |= spans.startPosition() == 0;
// System.out.println(spans.doc() + " - " + spans.start() + " - " +
// spans.end());
}
}
assertEquals(4, count);
assertTrue(sawZero);
writer.close();
is.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestPayloadExplanations method testSimpleTerm.
public void testSimpleTerm() throws Exception {
SpanTermQuery q = new SpanTermQuery(new Term(FIELD, "w2"));
testAllFunctions(q, new int[] { 0, 1, 2, 3 });
}
Aggregations