Search in sources :

Example 66 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestPayloads method testMixupDocs.

/** some docs have payload att, some not */
public void testMixupDocs() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    Field field = new TextField("field", "", Field.Store.NO);
    TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("here we go"));
    field.setTokenStream(ts);
    doc.add(field);
    writer.addDocument(doc);
    Token withPayload = new Token("withPayload", 0, 11);
    withPayload.setPayload(new BytesRef("test"));
    ts = new CannedTokenStream(withPayload);
    assertTrue(ts.hasAttribute(PayloadAttribute.class));
    field.setTokenStream(ts);
    writer.addDocument(doc);
    ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("another"));
    field.setTokenStream(ts);
    writer.addDocument(doc);
    DirectoryReader reader = writer.getReader();
    TermsEnum te = MultiFields.getFields(reader).terms("field").iterator();
    assertTrue(te.seekExact(new BytesRef("withPayload")));
    PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
    de.nextDoc();
    de.nextPosition();
    assertEquals(new BytesRef("test"), de.getPayload());
    writer.close();
    reader.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StringReader(java.io.StringReader) TextField(org.apache.lucene.document.TextField) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 67 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestPayloads method testMixupMultiValued.

/** some field instances have payload att, some not */
public void testMixupMultiValued() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field field = new TextField("field", "", Field.Store.NO);
    TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("here we go"));
    field.setTokenStream(ts);
    doc.add(field);
    Field field2 = new TextField("field", "", Field.Store.NO);
    Token withPayload = new Token("withPayload", 0, 11);
    withPayload.setPayload(new BytesRef("test"));
    ts = new CannedTokenStream(withPayload);
    assertTrue(ts.hasAttribute(PayloadAttribute.class));
    field2.setTokenStream(ts);
    doc.add(field2);
    Field field3 = new TextField("field", "", Field.Store.NO);
    ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("nopayload"));
    field3.setTokenStream(ts);
    doc.add(field3);
    writer.addDocument(doc);
    DirectoryReader reader = writer.getReader();
    LeafReader sr = getOnlyLeafReader(reader);
    PostingsEnum de = sr.postings(new Term("field", "withPayload"), PostingsEnum.PAYLOADS);
    de.nextDoc();
    de.nextPosition();
    assertEquals(new BytesRef("test"), de.getPayload());
    writer.close();
    reader.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StringReader(java.io.StringReader) TextField(org.apache.lucene.document.TextField) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 68 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestFieldReuse method testStringField.

public void testStringField() throws IOException {
    StringField stringField = new StringField("foo", "bar", Field.Store.NO);
    // passing null
    TokenStream ts = stringField.tokenStream(null, null);
    assertTokenStreamContents(ts, new String[] { "bar" }, new int[] { 0 }, new int[] { 3 });
    // now reuse previous stream
    stringField = new StringField("foo", "baz", Field.Store.NO);
    TokenStream ts2 = stringField.tokenStream(null, ts);
    assertSame(ts, ts);
    assertTokenStreamContents(ts, new String[] { "baz" }, new int[] { 0 }, new int[] { 3 });
    // pass a bogus stream and ensure it's still ok
    stringField = new StringField("foo", "beer", Field.Store.NO);
    TokenStream bogus = new CannedTokenStream();
    ts = stringField.tokenStream(null, bogus);
    assertNotSame(ts, bogus);
    assertTokenStreamContents(ts, new String[] { "beer" }, new int[] { 0 }, new int[] { 4 });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) StringField(org.apache.lucene.document.StringField) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 69 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testGraphAndGapSameTokenTerm.

public void testGraphAndGapSameTokenTerm() throws Exception {
    TokenStream ts = new CannedTokenStream(token("a", 1, 1), token("b", 1, 1), token("c", 2, 1), token("a", 0, 2), token("d", 1, 1));
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
    Iterator<TokenStream> it = graph.getFiniteStrings();
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "a", "b", "c", "d" }, new int[] { 1, 1, 2, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "a", "b", "a" }, new int[] { 1, 1, 2 });
    assertFalse(it.hasNext());
    int[] points = graph.articulationPoints();
    assertArrayEquals(points, new int[] { 1, 2 });
    assertFalse(graph.hasSidePath(0));
    it = graph.getFiniteStrings(0, 1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "a" }, new int[] { 1 });
    assertFalse(it.hasNext());
    Term[] terms = graph.getTerms("field", 0);
    assertArrayEquals(terms, new Term[] { new Term("field", "a") });
    assertFalse(graph.hasSidePath(1));
    it = graph.getFiniteStrings(1, 2);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "b" }, new int[] { 1 });
    assertFalse(it.hasNext());
    terms = graph.getTerms("field", 1);
    assertArrayEquals(terms, new Term[] { new Term("field", "b") });
    assertTrue(graph.hasSidePath(2));
    it = graph.getFiniteStrings(2, -1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "c", "d" }, new int[] { 2, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "a" }, new int[] { 2 });
    assertFalse(it.hasNext());
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 70 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testMultipleSidePaths.

public void testMultipleSidePaths() throws Exception {
    TokenStream ts = new CannedTokenStream(token("the", 1, 1), token("ny", 1, 4), token("new", 0, 1), token("york", 1, 1), token("wifi", 1, 4), token("wi", 0, 1), token("fi", 1, 3), token("wifi", 2, 2), token("wi", 0, 1), token("fi", 1, 1), token("network", 1, 1));
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
    Iterator<TokenStream> it = graph.getFiniteStrings();
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "the", "ny", "wifi", "network" }, new int[] { 1, 1, 2, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "the", "ny", "wi", "fi", "network" }, new int[] { 1, 1, 2, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "the", "new", "york", "wifi", "network" }, new int[] { 1, 1, 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "the", "new", "york", "wi", "fi", "network" }, new int[] { 1, 1, 1, 1, 1, 1 });
    assertFalse(it.hasNext());
    int[] points = graph.articulationPoints();
    assertArrayEquals(points, new int[] { 1, 7 });
    assertFalse(graph.hasSidePath(0));
    it = graph.getFiniteStrings(0, 1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "the" }, new int[] { 1 });
    assertFalse(it.hasNext());
    Term[] terms = graph.getTerms("field", 0);
    assertArrayEquals(terms, new Term[] { new Term("field", "the") });
    assertTrue(graph.hasSidePath(1));
    it = graph.getFiniteStrings(1, 7);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "ny", "wifi" }, new int[] { 1, 2 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "ny", "wi", "fi" }, new int[] { 1, 2, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york", "wifi" }, new int[] { 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york", "wi", "fi" }, new int[] { 1, 1, 1, 1 });
    assertFalse(it.hasNext());
    assertFalse(graph.hasSidePath(7));
    it = graph.getFiniteStrings(7, -1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
    assertFalse(it.hasNext());
    terms = graph.getTerms("field", 7);
    assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Aggregations

CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)78 TokenStream (org.apache.lucene.analysis.TokenStream)43 Token (org.apache.lucene.analysis.Token)37 Directory (org.apache.lucene.store.Directory)33 Document (org.apache.lucene.document.Document)26 TextField (org.apache.lucene.document.TextField)22 Field (org.apache.lucene.document.Field)15 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)14 BytesRef (org.apache.lucene.util.BytesRef)14 FieldType (org.apache.lucene.document.FieldType)13 Term (org.apache.lucene.index.Term)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)11 IndexReader (org.apache.lucene.index.IndexReader)10 Analyzer (org.apache.lucene.analysis.Analyzer)9 StringField (org.apache.lucene.document.StringField)8 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)8 Reader (java.io.Reader)7 StringReader (java.io.StringReader)7 Input (org.apache.lucene.search.suggest.Input)7