Search in sources :

Example 6 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestMultiPhraseQuery method doTestZeroPosIncrSloppy.

private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
    // random dir
    Directory dir = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(null);
    IndexWriter writer = new IndexWriter(dir, cfg);
    Document doc = new Document();
    doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
    writer.addDocument(doc);
    IndexReader r = DirectoryReader.open(writer);
    writer.close();
    IndexSearcher s = newSearcher(r);
    if (VERBOSE) {
        System.out.println("QUERY=" + q);
    }
    TopDocs hits = s.search(q, 1);
    assertEquals("wrong number of results", nExpected, hits.totalHits);
    if (VERBOSE) {
        for (int hit = 0; hit < hits.totalHits; hit++) {
            ScoreDoc sd = hits.scoreDocs[hit];
            System.out.println("  hit doc=" + sd.doc + " score=" + sd.score);
        }
    }
    r.close();
    dir.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 7 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestTrimFilter method testTrim.

public void testTrim() throws Exception {
    char[] a = " a ".toCharArray();
    char[] b = "b   ".toCharArray();
    char[] ccc = "cCc".toCharArray();
    char[] whitespace = "   ".toCharArray();
    char[] empty = "".toCharArray();
    TokenStream ts = new CannedTokenStream(new Token(new String(a, 0, a.length), 1, 5), new Token(new String(b, 0, b.length), 6, 10), new Token(new String(ccc, 0, ccc.length), 11, 15), new Token(new String(whitespace, 0, whitespace.length), 16, 20), new Token(new String(empty, 0, empty.length), 21, 21));
    ts = new TrimFilter(ts);
    assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", "" });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 8 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestPayloadsOnVectors method testMixupDocs.

/** some docs have payload att, some not */
public void testMixupDocs() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorPayloads(true);
    customType.setStoreTermVectorOffsets(random().nextBoolean());
    Field field = new Field("field", "", customType);
    TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("here we go"));
    field.setTokenStream(ts);
    doc.add(field);
    writer.addDocument(doc);
    Token withPayload = new Token("withPayload", 0, 11);
    withPayload.setPayload(new BytesRef("test"));
    ts = new CannedTokenStream(withPayload);
    assertTrue(ts.hasAttribute(PayloadAttribute.class));
    field.setTokenStream(ts);
    writer.addDocument(doc);
    ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    ((Tokenizer) ts).setReader(new StringReader("another"));
    field.setTokenStream(ts);
    writer.addDocument(doc);
    DirectoryReader reader = writer.getReader();
    Terms terms = reader.getTermVector(1, "field");
    assert terms != null;
    TermsEnum termsEnum = terms.iterator();
    assertTrue(termsEnum.seekExact(new BytesRef("withPayload")));
    PostingsEnum de = termsEnum.postings(null, PostingsEnum.ALL);
    assertEquals(0, de.nextDoc());
    assertEquals(0, de.nextPosition());
    assertEquals(new BytesRef("test"), de.getPayload());
    writer.close();
    reader.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 9 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestMaxPosition method testTooBigPosition.

public void testTooBigPosition() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    // This is at position 1:
    Token t1 = new Token("foo", 0, 3);
    t1.setPositionIncrement(2);
    if (random().nextBoolean()) {
        t1.setPayload(new BytesRef(new byte[] { 0x1 }));
    }
    Token t2 = new Token("foo", 4, 7);
    // This should overflow max:
    t2.setPositionIncrement(IndexWriter.MAX_POSITION);
    if (random().nextBoolean()) {
        t2.setPayload(new BytesRef(new byte[] { 0x1 }));
    }
    doc.add(new TextField("foo", new CannedTokenStream(new Token[] { t1, t2 })));
    expectThrows(IllegalArgumentException.class, () -> {
        iw.addDocument(doc);
    });
    // Document should not be visible:
    IndexReader r = DirectoryReader.open(iw);
    assertEquals(0, r.numDocs());
    r.close();
    iw.close();
    dir.close();
}
Also used : TextField(org.apache.lucene.document.TextField) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 10 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestFlattenGraphFilter method testNonGraph.

public void testNonGraph() throws Exception {
    TokenStream in = new CannedTokenStream(0, 22, new Token[] { token("hello", 1, 1, 0, 5), token("pseudo", 1, 1, 6, 12), token("world", 1, 1, 13, 18), token("fun", 1, 1, 19, 22) });
    TokenStream out = new FlattenGraphFilter(in);
    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(out, new String[] { "hello", "pseudo", "world", "fun" }, new int[] { 0, 6, 13, 19 }, new int[] { 5, 12, 18, 22 }, new int[] { 1, 1, 1, 1 }, new int[] { 1, 1, 1, 1 }, 22);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Aggregations

CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)78 TokenStream (org.apache.lucene.analysis.TokenStream)43 Token (org.apache.lucene.analysis.Token)37 Directory (org.apache.lucene.store.Directory)33 Document (org.apache.lucene.document.Document)26 TextField (org.apache.lucene.document.TextField)22 Field (org.apache.lucene.document.Field)15 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)14 BytesRef (org.apache.lucene.util.BytesRef)14 FieldType (org.apache.lucene.document.FieldType)13 Term (org.apache.lucene.index.Term)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)11 IndexReader (org.apache.lucene.index.IndexReader)10 Analyzer (org.apache.lucene.analysis.Analyzer)9 StringField (org.apache.lucene.document.StringField)8 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)8 Reader (java.io.Reader)7 StringReader (java.io.StringReader)7 Input (org.apache.lucene.search.suggest.Input)7