use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestPayloadsOnVectors method testMixupMultiValued.
/** some field instances have payload att, some not */
public void testMixupMultiValued() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorPayloads(true);
customType.setStoreTermVectorOffsets(random().nextBoolean());
Field field = new Field("field", "", customType);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts);
doc.add(field);
Field field2 = new Field("field", "", customType);
Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class));
field2.setTokenStream(ts);
doc.add(field2);
Field field3 = new Field("field", "", customType);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("nopayload"));
field3.setTokenStream(ts);
doc.add(field3);
writer.addDocument(doc);
DirectoryReader reader = writer.getReader();
Terms terms = reader.getTermVector(0, "field");
assert terms != null;
TermsEnum termsEnum = terms.iterator();
assertTrue(termsEnum.seekExact(new BytesRef("withPayload")));
PostingsEnum de = termsEnum.postings(null, PostingsEnum.ALL);
assertEquals(0, de.nextDoc());
assertEquals(3, de.nextPosition());
assertEquals(new BytesRef("test"), de.getPayload());
writer.close();
reader.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestPayloads method testMixupDocs.
/** some docs have payload att, some not */
public void testMixupDocs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
Field field = new TextField("field", "", Field.Store.NO);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts);
doc.add(field);
writer.addDocument(doc);
Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class));
field.setTokenStream(ts);
writer.addDocument(doc);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("another"));
field.setTokenStream(ts);
writer.addDocument(doc);
DirectoryReader reader = writer.getReader();
TermsEnum te = MultiFields.getFields(reader).terms("field").iterator();
assertTrue(te.seekExact(new BytesRef("withPayload")));
PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
de.nextDoc();
de.nextPosition();
assertEquals(new BytesRef("test"), de.getPayload());
writer.close();
reader.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestPayloads method testMixupMultiValued.
/** some field instances have payload att, some not */
public void testMixupMultiValued() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field field = new TextField("field", "", Field.Store.NO);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts);
doc.add(field);
Field field2 = new TextField("field", "", Field.Store.NO);
Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class));
field2.setTokenStream(ts);
doc.add(field2);
Field field3 = new TextField("field", "", Field.Store.NO);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("nopayload"));
field3.setTokenStream(ts);
doc.add(field3);
writer.addDocument(doc);
DirectoryReader reader = writer.getReader();
LeafReader sr = getOnlyLeafReader(reader);
PostingsEnum de = sr.postings(new Term("field", "withPayload"), PostingsEnum.PAYLOADS);
de.nextDoc();
de.nextPosition();
assertEquals(new BytesRef("test"), de.getPayload());
writer.close();
reader.close();
dir.close();
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTwoTrailingHolesTriShingle.
public void testTwoTrailingHolesTriShingle() throws IOException {
// Analyzing "purple wizard of the", where of and the are removed as a
// stopwords, leaving two trailing holes:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _" }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTwoTrailingHoles.
public void testTwoTrailingHoles() throws IOException {
// Analyzing "purple wizard of the", where of and the are removed as a
// stopwords, leaving two trailing holes:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "wizard", "wizard _" }, new int[] { 0, 0, 7, 7 }, new int[] { 6, 13, 13, 20 }, new int[] { 1, 0, 1, 0 }, 20);
}
Aggregations