use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testStrangelyNumberedNodes.
public void testStrangelyNumberedNodes() throws Exception {
// Uses only nodes 0, 2, 3, i.e. 1 is just never used (it is not a hole!!)
TokenStream in = new CannedTokenStream(0, 27, new Token[] { token("dog", 1, 3, 0, 5), token("puppy", 0, 3, 0, 5), token("flies", 3, 1, 6, 11) });
TokenStream out = new FlattenGraphFilter(in);
assertTokenStreamContents(out, new String[] { "dog", "puppy", "flies" }, new int[] { 0, 0, 6 }, new int[] { 5, 5, 11 }, new int[] { 1, 0, 1 }, new int[] { 1, 1, 1 }, 27);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testAlreadyFlatten.
// Make sure graph is unchanged if it's already flat
public void testAlreadyFlatten() throws Exception {
TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("wtf", 1, 1, 0, 3), token("what", 0, 1, 0, 3), token("wow", 0, 1, 0, 3), token("the", 1, 1, 0, 3), token("that's", 0, 1, 0, 3), token("fudge", 1, 1, 0, 3), token("funny", 0, 1, 0, 3), token("happened", 1, 1, 4, 12) });
TokenStream out = new FlattenGraphFilter(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(out, new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 }, new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1 }, 12);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testHoleUnderSyn.
public void testHoleUnderSyn() throws Exception {
// Tests a StopFilter after SynFilter where a stopword in a syn is removed
//
// wizard of oz -> woz syn, but then "of" becomes a hole
TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("wizard", 1, 1, 0, 6), token("woz", 0, 3, 0, 12), token("oz", 2, 1, 10, 12) });
TokenStream out = new FlattenGraphFilter(in);
assertTokenStreamContents(out, new String[] { "wizard", "woz", "oz" }, new int[] { 0, 0, 10 }, new int[] { 6, 12, 12 }, new int[] { 1, 0, 2 }, new int[] { 1, 3, 1 }, 12);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testWTF2.
/** Same as testWTF1 except the "wtf" token comes out later */
public void testWTF2() throws Exception {
// "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("what", 1, 1, 0, 3), token("wow", 0, 3, 0, 3), token("wtf", 0, 5, 0, 3), token("the", 1, 1, 0, 3), token("fudge", 1, 3, 0, 3), token("that's", 1, 1, 0, 3), token("funny", 1, 1, 0, 3), token("happened", 1, 1, 4, 12) });
TokenStream out = new FlattenGraphFilter(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(out, new String[] { "what", "wow", "wtf", "the", "that's", "fudge", "funny", "happened" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 }, new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 1, 1, 3, 1, 1, 1, 1, 1 }, 12);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testNonGreedySynonyms.
public void testNonGreedySynonyms() throws Exception {
// This is just "hypothetical" for Lucene today, because SynFilter is
// greedy: when two syn rules match on overlapping tokens, only one
// (greedily) wins. This test pretends all syn matches could match:
TokenStream in = new CannedTokenStream(0, 20, new Token[] { token("wizard", 1, 1, 0, 6), token("wizard_of_oz", 0, 3, 0, 12), token("of", 1, 1, 7, 9), token("oz", 1, 1, 10, 12), token("oz_screams", 0, 2, 10, 20), token("screams", 1, 1, 13, 20) });
TokenStream out = new FlattenGraphFilter(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(out, new String[] { "wizard", "wizard_of_oz", "of", "oz", "oz_screams", "screams" }, new int[] { 0, 0, 7, 10, 10, 13 }, new int[] { 6, 12, 9, 12, 20, 20 }, new int[] { 1, 0, 1, 1, 0, 1 }, new int[] { 1, 3, 1, 1, 2, 1 }, 20);
}
Aggregations