Search in sources :

Example 41 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestSuggestSpellingConverter method assertConvertsTo.

public void assertConvertsTo(String text, String[] expected) throws IOException {
    Collection<Token> tokens = converter.convert(text);
    TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0]));
    assertTokenStreamContents(ts, expected);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 42 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class ShingleFilterTest method shingleFilterTest.

protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, int[] positionIncrements, String[] types, boolean outputUnigrams, boolean outputUnigramsIfNoShingles) throws IOException {
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 43 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestLegacyFieldReuse method testNumericReuse.

public void testNumericReuse() throws IOException {
    LegacyIntField legacyIntField = new LegacyIntField("foo", 5, Field.Store.NO);
    // passing null
    TokenStream ts = legacyIntField.tokenStream(null, null);
    assertTrue(ts instanceof LegacyNumericTokenStream);
    assertEquals(LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, ((LegacyNumericTokenStream) ts).getPrecisionStep());
    assertNumericContents(5, ts);
    // now reuse previous stream
    legacyIntField = new LegacyIntField("foo", 20, Field.Store.NO);
    TokenStream ts2 = legacyIntField.tokenStream(null, ts);
    assertSame(ts, ts2);
    assertNumericContents(20, ts);
    // pass a bogus stream and ensure it's still ok
    legacyIntField = new LegacyIntField("foo", 2343, Field.Store.NO);
    TokenStream bogus = new CannedTokenStream(new Token("bogus", 0, 5));
    ts = legacyIntField.tokenStream(null, bogus);
    assertNotSame(bogus, ts);
    assertNumericContents(2343, ts);
    // pass another bogus stream (numeric, but different precision step!)
    legacyIntField = new LegacyIntField("foo", 42, Field.Store.NO);
    assert 3 != LegacyNumericUtils.PRECISION_STEP_DEFAULT;
    bogus = new LegacyNumericTokenStream(3);
    ts = legacyIntField.tokenStream(null, bogus);
    assertNotSame(bogus, ts);
    assertNumericContents(42, ts);
}
Also used : LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) LegacyIntField(org.apache.solr.legacy.LegacyIntField) LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream)

Example 44 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestFlattenGraphFilter method testWTF1.

public void testWTF1() throws Exception {
    // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
    TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("wtf", 1, 5, 0, 3), token("what", 0, 1, 0, 3), token("wow", 0, 3, 0, 3), token("the", 1, 1, 0, 3), token("fudge", 1, 3, 0, 3), token("that's", 1, 1, 0, 3), token("funny", 1, 1, 0, 3), token("happened", 1, 1, 4, 12) });
    TokenStream out = new FlattenGraphFilter(in);
    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(out, new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 }, new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 3, 1, 1, 1, 1, 1, 1, 1 }, 12);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 45 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestFlattenGraphFilter method testSimpleHole.

public void testSimpleHole() throws Exception {
    TokenStream in = new CannedTokenStream(0, 13, new Token[] { token("hello", 1, 1, 0, 5), token("hole", 2, 1, 6, 10), token("fun", 1, 1, 11, 13) });
    TokenStream out = new FlattenGraphFilter(in);
    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(out, new String[] { "hello", "hole", "fun" }, new int[] { 0, 6, 11 }, new int[] { 5, 10, 13 }, new int[] { 1, 2, 1 }, new int[] { 1, 1, 1 }, 13);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Aggregations

CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)78 TokenStream (org.apache.lucene.analysis.TokenStream)43 Token (org.apache.lucene.analysis.Token)37 Directory (org.apache.lucene.store.Directory)33 Document (org.apache.lucene.document.Document)26 TextField (org.apache.lucene.document.TextField)22 Field (org.apache.lucene.document.Field)15 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)14 BytesRef (org.apache.lucene.util.BytesRef)14 FieldType (org.apache.lucene.document.FieldType)13 Term (org.apache.lucene.index.Term)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)11 IndexReader (org.apache.lucene.index.IndexReader)10 Analyzer (org.apache.lucene.analysis.Analyzer)9 StringField (org.apache.lucene.document.StringField)8 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)8 Reader (java.io.Reader)7 StringReader (java.io.StringReader)7 Input (org.apache.lucene.search.suggest.Input)7