use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestMultiPhraseQuery method doTestZeroPosIncrSloppy.
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
// random dir
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(null);
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
writer.addDocument(doc);
IndexReader r = DirectoryReader.open(writer);
writer.close();
IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("QUERY=" + q);
}
TopDocs hits = s.search(q, 1);
assertEquals("wrong number of results", nExpected, hits.totalHits);
if (VERBOSE) {
for (int hit = 0; hit < hits.totalHits; hit++) {
ScoreDoc sd = hits.scoreDocs[hit];
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestTrimFilter method testTrim.
public void testTrim() throws Exception {
char[] a = " a ".toCharArray();
char[] b = "b ".toCharArray();
char[] ccc = "cCc".toCharArray();
char[] whitespace = " ".toCharArray();
char[] empty = "".toCharArray();
TokenStream ts = new CannedTokenStream(new Token(new String(a, 0, a.length), 1, 5), new Token(new String(b, 0, b.length), 6, 10), new Token(new String(ccc, 0, ccc.length), 11, 15), new Token(new String(whitespace, 0, whitespace.length), 16, 20), new Token(new String(empty, 0, empty.length), 21, 21));
ts = new TrimFilter(ts);
assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", "" });
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestPayloadsOnVectors method testMixupDocs.
/** some docs have payload att, some not */
public void testMixupDocs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorPayloads(true);
customType.setStoreTermVectorOffsets(random().nextBoolean());
Field field = new Field("field", "", customType);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts);
doc.add(field);
writer.addDocument(doc);
Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class));
field.setTokenStream(ts);
writer.addDocument(doc);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("another"));
field.setTokenStream(ts);
writer.addDocument(doc);
DirectoryReader reader = writer.getReader();
Terms terms = reader.getTermVector(1, "field");
assert terms != null;
TermsEnum termsEnum = terms.iterator();
assertTrue(termsEnum.seekExact(new BytesRef("withPayload")));
PostingsEnum de = termsEnum.postings(null, PostingsEnum.ALL);
assertEquals(0, de.nextDoc());
assertEquals(0, de.nextPosition());
assertEquals(new BytesRef("test"), de.getPayload());
writer.close();
reader.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestMaxPosition method testTooBigPosition.
public void testTooBigPosition() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
// This is at position 1:
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(2);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef(new byte[] { 0x1 }));
}
Token t2 = new Token("foo", 4, 7);
// This should overflow max:
t2.setPositionIncrement(IndexWriter.MAX_POSITION);
if (random().nextBoolean()) {
t2.setPayload(new BytesRef(new byte[] { 0x1 }));
}
doc.add(new TextField("foo", new CannedTokenStream(new Token[] { t1, t2 })));
expectThrows(IllegalArgumentException.class, () -> {
iw.addDocument(doc);
});
// Document should not be visible:
IndexReader r = DirectoryReader.open(iw);
assertEquals(0, r.numDocs());
r.close();
iw.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testNonGraph.
public void testNonGraph() throws Exception {
TokenStream in = new CannedTokenStream(0, 22, new Token[] { token("hello", 1, 1, 0, 5), token("pseudo", 1, 1, 6, 12), token("world", 1, 1, 13, 18), token("fun", 1, 1, 19, 22) });
TokenStream out = new FlattenGraphFilter(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(out, new String[] { "hello", "pseudo", "world", "fun" }, new int[] { 0, 6, 13, 19 }, new int[] { 5, 12, 18, 22 }, new int[] { 1, 1, 1, 1 }, new int[] { 1, 1, 1, 1 }, 22);
}
Aggregations