use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestSpanNotQuery method testNoPositions.
public void testNoPositions() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = new IndexSearcher(ir);
SpanTermQuery query = new SpanTermQuery(new Term("foo", "bar"));
SpanTermQuery query2 = new SpanTermQuery(new Term("foo", "baz"));
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
is.search(new SpanNotQuery(query, query2), 5);
});
assertTrue(expected.getMessage().contains("was indexed without position data"));
ir.close();
dir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TokenSourcesTest method testMaxStartOffsetConsistency.
public void testMaxStartOffsetConsistency() throws IOException {
FieldType tvFieldType = new FieldType(TextField.TYPE_NOT_STORED);
tvFieldType.setStoreTermVectors(true);
tvFieldType.setStoreTermVectorOffsets(true);
tvFieldType.setStoreTermVectorPositions(true);
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
//we don't necessarily consume the whole stream because of limiting by startOffset
analyzer.setEnableChecks(false);
Document doc = new Document();
final String TEXT = " f gg h";
doc.add(new Field("fld_tv", analyzer.tokenStream("fooFld", TEXT), tvFieldType));
doc.add(new TextField("fld_notv", analyzer.tokenStream("barFld", TEXT)));
IndexReader reader;
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
writer.addDocument(doc);
reader = writer.getReader();
}
try {
Fields tvFields = reader.getTermVectors(0);
for (int maxStartOffset = -1; maxStartOffset <= TEXT.length(); maxStartOffset++) {
TokenStream tvStream = TokenSources.getTokenStream("fld_tv", tvFields, TEXT, analyzer, maxStartOffset);
TokenStream anaStream = TokenSources.getTokenStream("fld_notv", tvFields, TEXT, analyzer, maxStartOffset);
//assert have same tokens, none of which has a start offset > maxStartOffset
final OffsetAttribute tvOffAtt = tvStream.addAttribute(OffsetAttribute.class);
final OffsetAttribute anaOffAtt = anaStream.addAttribute(OffsetAttribute.class);
tvStream.reset();
anaStream.reset();
while (tvStream.incrementToken()) {
assertTrue(anaStream.incrementToken());
assertEquals(tvOffAtt.startOffset(), anaOffAtt.startOffset());
if (maxStartOffset >= 0)
assertTrue(tvOffAtt.startOffset() <= maxStartOffset);
}
assertTrue(anaStream.incrementToken() == false);
tvStream.end();
anaStream.end();
tvStream.close();
anaStream.close();
}
} finally {
reader.close();
}
dir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TokenSourcesTest method testPayloads.
// LUCENE-5294
public void testPayloads() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
myFieldType.setStoreTermVectors(true);
myFieldType.setStoreTermVectorOffsets(true);
myFieldType.setStoreTermVectorPositions(true);
myFieldType.setStoreTermVectorPayloads(true);
curOffset = 0;
Token[] tokens = new Token[] { getToken("foxes"), getToken("can"), getToken("jump"), getToken("high") };
Document doc = new Document();
doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
assertEquals(1, reader.numDocs());
TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
ts.reset();
for (Token token : tokens) {
assertTrue(ts.incrementToken());
assertEquals(token.toString(), termAtt.toString());
assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
assertEquals(token.getPayload(), payloadAtt.getPayload());
assertEquals(token.startOffset(), offsetAtt.startOffset());
assertEquals(token.endOffset(), offsetAtt.endOffset());
}
assertFalse(ts.incrementToken());
reader.close();
dir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testBasics.
//
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer tests.
//
public void testBasics() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
ir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testMultipleTerms.
public void testMultipleTerms() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
ir.close();
}
Aggregations