use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestGraphTokenStreamFiniteStrings method testSingleGraphWithGap.
public void testSingleGraphWithGap() throws Exception {
// "hey the fast wifi network", where "the" removed
TokenStream ts = new CannedTokenStream(token("hey", 1, 1), token("fast", 2, 1), token("wi", 1, 1), token("wifi", 0, 2), token("fi", 1, 1), token("network", 1, 1));
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
Iterator<TokenStream> it = graph.getFiniteStrings();
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "hey", "fast", "wi", "fi", "network" }, new int[] { 1, 2, 1, 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "hey", "fast", "wifi", "network" }, new int[] { 1, 2, 1, 1 });
assertFalse(it.hasNext());
int[] points = graph.articulationPoints();
assertArrayEquals(points, new int[] { 1, 2, 4 });
assertFalse(graph.hasSidePath(0));
it = graph.getFiniteStrings(0, 1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "hey" }, new int[] { 1 });
assertFalse(it.hasNext());
Term[] terms = graph.getTerms("field", 0);
assertArrayEquals(terms, new Term[] { new Term("field", "hey") });
assertFalse(graph.hasSidePath(1));
it = graph.getFiniteStrings(1, 2);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast" }, new int[] { 2 });
assertFalse(it.hasNext());
terms = graph.getTerms("field", 1);
assertArrayEquals(terms, new Term[] { new Term("field", "fast") });
assertTrue(graph.hasSidePath(2));
it = graph.getFiniteStrings(2, 4);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wi", "fi" }, new int[] { 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wifi" }, new int[] { 1 });
assertFalse(it.hasNext());
assertFalse(graph.hasSidePath(4));
it = graph.getFiniteStrings(4, -1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
assertFalse(it.hasNext());
terms = graph.getTerms("field", 4);
assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestGraphTokenStreamFiniteStrings method testStackedGraphWithGap.
public void testStackedGraphWithGap() throws Exception {
TokenStream ts = new CannedTokenStream(token("fast", 1, 1), token("wi", 2, 1), token("wifi", 0, 2), token("wireless", 0, 2), token("fi", 1, 1), token("network", 1, 1));
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
Iterator<TokenStream> it = graph.getFiniteStrings();
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast", "wi", "fi", "network" }, new int[] { 1, 2, 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast", "wifi", "network" }, new int[] { 1, 2, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast", "wireless", "network" }, new int[] { 1, 2, 1 });
assertFalse(it.hasNext());
int[] points = graph.articulationPoints();
assertArrayEquals(points, new int[] { 1, 3 });
assertFalse(graph.hasSidePath(0));
it = graph.getFiniteStrings(0, 1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast" }, new int[] { 1 });
assertFalse(it.hasNext());
Term[] terms = graph.getTerms("field", 0);
assertArrayEquals(terms, new Term[] { new Term("field", "fast") });
assertTrue(graph.hasSidePath(1));
it = graph.getFiniteStrings(1, 3);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wi", "fi" }, new int[] { 2, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wifi" }, new int[] { 2 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wireless" }, new int[] { 2 });
assertFalse(it.hasNext());
assertFalse(graph.hasSidePath(3));
it = graph.getFiniteStrings(3, -1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
assertFalse(it.hasNext());
terms = graph.getTerms("field", 3);
assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.
the class TestGraphTokenStreamFiniteStrings method testGraphWithRegularSynonym.
public void testGraphWithRegularSynonym() throws Exception {
TokenStream ts = new CannedTokenStream(token("fast", 1, 1), token("speedy", 0, 1), token("wi", 1, 1), token("wifi", 0, 2), token("fi", 1, 1), token("network", 1, 1));
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
Iterator<TokenStream> it = graph.getFiniteStrings();
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast", "wi", "fi", "network" }, new int[] { 1, 1, 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast", "wifi", "network" }, new int[] { 1, 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "speedy", "wi", "fi", "network" }, new int[] { 1, 1, 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "speedy", "wifi", "network" }, new int[] { 1, 1, 1 });
assertFalse(it.hasNext());
int[] points = graph.articulationPoints();
assertArrayEquals(points, new int[] { 1, 3 });
assertFalse(graph.hasSidePath(0));
it = graph.getFiniteStrings(0, 1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "fast" }, new int[] { 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "speedy" }, new int[] { 1 });
assertFalse(it.hasNext());
Term[] terms = graph.getTerms("field", 0);
assertArrayEquals(terms, new Term[] { new Term("field", "fast"), new Term("field", "speedy") });
assertTrue(graph.hasSidePath(1));
it = graph.getFiniteStrings(1, 3);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wi", "fi" }, new int[] { 1, 1 });
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "wifi" }, new int[] { 1 });
assertFalse(it.hasNext());
assertFalse(graph.hasSidePath(3));
it = graph.getFiniteStrings(3, -1);
assertTrue(it.hasNext());
assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
assertFalse(it.hasNext());
terms = graph.getTerms("field", 3);
assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
use of org.apache.lucene.analysis.CannedTokenStream in project elasticsearch by elastic.
the class FlattenGraphTokenFilterFactoryTests method testBasic.
public void testBasic() throws IOException {
Index index = new Index("test", "_na_");
String name = "ngr";
Settings indexSettings = newAnalysisSettingsBuilder().build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
Settings settings = newAnalysisSettingsBuilder().build();
// "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("wtf", 1, 5, 0, 3), token("what", 0, 1, 0, 3), token("wow", 0, 3, 0, 3), token("the", 1, 1, 0, 3), token("fudge", 1, 3, 0, 3), token("that's", 1, 1, 0, 3), token("funny", 1, 1, 0, 3), token("happened", 1, 1, 4, 12) });
TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(tokens, new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 }, new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 3, 1, 1, 1, 1, 1, 1, 1 }, 12);
}
use of org.apache.lucene.analysis.CannedTokenStream in project elasticsearch by elastic.
the class TokenCountFieldMapperTests method testCountPositions.
public void testCountPositions() throws IOException {
// We're looking to make sure that we:
// Don't count tokens without an increment
Token t1 = new Token();
t1.setPositionIncrement(0);
Token t2 = new Token();
// Count normal tokens with one increment
t2.setPositionIncrement(1);
Token t3 = new Token();
// Count funny tokens with more than one increment
t2.setPositionIncrement(2);
// Count the final token increment on the rare token streams that have them
int finalTokenIncrement = 4;
Token[] tokens = new Token[] { t1, t2, t3 };
Collections.shuffle(Arrays.asList(tokens), random());
final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
// TODO: we have no CannedAnalyzer?
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
}
};
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
Aggregations