use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestFlattenGraphFilter method testTwoLongParallelPaths.
public void testTwoLongParallelPaths() throws Exception {
// "a a a a a a" in parallel with "b b b b b b"
TokenStream in = new CannedTokenStream(0, 11, new Token[] { token("a", 1, 1, 0, 1), token("b", 0, 2, 0, 1), token("a", 1, 2, 2, 3), token("b", 1, 2, 2, 3), token("a", 1, 2, 4, 5), token("b", 1, 2, 4, 5), token("a", 1, 2, 6, 7), token("b", 1, 2, 6, 7), token("a", 1, 2, 8, 9), token("b", 1, 2, 8, 9), token("a", 1, 2, 10, 11), token("b", 1, 2, 10, 11) });
TokenStream out = new FlattenGraphFilter(in);
// ... becomes flattened to a single path with overlapping a/b token between each node:
assertTokenStreamContents(out, new String[] { "a", "b", "a", "b", "a", "b", "a", "b", "a", "b", "a", "b" }, new int[] { 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10 }, new int[] { 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11 }, new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 11);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTwoTrailingHolesTriShingle.
public void testTwoTrailingHolesTriShingle() throws IOException {
// Analyzing "purple wizard of the", where of and the are removed as a
// stopwords, leaving two trailing holes:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _" }, new int[] { 0, 0, 0, 7, 7, 7 }, new int[] { 6, 13, 20, 13, 20, 20 }, new int[] { 1, 0, 0, 1, 0, 0 }, 20);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTwoTrailingHoles.
public void testTwoTrailingHoles() throws IOException {
// Analyzing "purple wizard of the", where of and the are removed as a
// stopwords, leaving two trailing holes:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "wizard", "wizard _" }, new int[] { 0, 0, 7, 7 }, new int[] { 6, 13, 13, 20 }, new int[] { 1, 0, 1, 0 }, 20);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTrailingHole2.
public void testTrailingHole2() throws IOException {
// Analyzing "purple wizard of", where of is removed as a
// stopword leaving a trailing hole:
Token[] inputTokens = new Token[] { createToken("purple", 0, 6), createToken("wizard", 7, 13) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);
assertTokenStreamContents(filter, new String[] { "purple", "purple wizard", "wizard", "wizard _" }, new int[] { 0, 0, 7, 7 }, new int[] { 6, 13, 13, 16 }, new int[] { 1, 0, 1, 0 }, 16);
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class ShingleFilterTest method testTrailingHole1.
public void testTrailingHole1() throws IOException {
// Analyzing "wizard of", where of is removed as a
// stopword leaving a trailing hole:
Token[] inputTokens = new Token[] { createToken("wizard", 0, 6) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);
assertTokenStreamContents(filter, new String[] { "wizard", "wizard _" }, new int[] { 0, 0 }, new int[] { 6, 9 }, new int[] { 1, 0 }, 9);
}
Aggregations