Search in sources :

Example 71 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testStackedGraph.

public void testStackedGraph() throws Exception {
    TokenStream ts = new CannedTokenStream(token("fast", 1, 1), token("wi", 1, 1), token("wifi", 0, 2), token("wireless", 0, 2), token("fi", 1, 1), token("network", 1, 1));
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
    Iterator<TokenStream> it = graph.getFiniteStrings();
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast", "wi", "fi", "network" }, new int[] { 1, 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast", "wifi", "network" }, new int[] { 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast", "wireless", "network" }, new int[] { 1, 1, 1 });
    assertFalse(it.hasNext());
    int[] points = graph.articulationPoints();
    assertArrayEquals(points, new int[] { 1, 3 });
    assertFalse(graph.hasSidePath(0));
    it = graph.getFiniteStrings(0, 1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast" }, new int[] { 1 });
    assertFalse(it.hasNext());
    Term[] terms = graph.getTerms("field", 0);
    assertArrayEquals(terms, new Term[] { new Term("field", "fast") });
    assertTrue(graph.hasSidePath(1));
    it = graph.getFiniteStrings(1, 3);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "wi", "fi" }, new int[] { 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "wifi" }, new int[] { 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "wireless" }, new int[] { 1 });
    assertFalse(it.hasNext());
    assertFalse(graph.hasSidePath(3));
    it = graph.getFiniteStrings(3, -1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
    assertFalse(it.hasNext());
    terms = graph.getTerms("field", 3);
    assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 72 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testStackedGraphWithRepeat.

public void testStackedGraphWithRepeat() throws Exception {
    TokenStream ts = new CannedTokenStream(token("ny", 1, 4), token("new", 0, 1), token("new", 0, 3), token("york", 1, 1), token("city", 1, 2), token("york", 1, 1), token("is", 1, 1), token("great", 1, 1));
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
    Iterator<TokenStream> it = graph.getFiniteStrings();
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "ny", "is", "great" }, new int[] { 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york", "city", "is", "great" }, new int[] { 1, 1, 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york", "is", "great" }, new int[] { 1, 1, 1, 1 });
    assertFalse(it.hasNext());
    int[] points = graph.articulationPoints();
    assertArrayEquals(points, new int[] { 4, 5 });
    assertTrue(graph.hasSidePath(0));
    it = graph.getFiniteStrings(0, 4);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "ny" }, new int[] { 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york", "city" }, new int[] { 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "new", "york" }, new int[] { 1, 1 });
    assertFalse(it.hasNext());
    assertFalse(graph.hasSidePath(4));
    it = graph.getFiniteStrings(4, 5);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "is" }, new int[] { 1 });
    assertFalse(it.hasNext());
    Term[] terms = graph.getTerms("field", 4);
    assertArrayEquals(terms, new Term[] { new Term("field", "is") });
    assertFalse(graph.hasSidePath(5));
    it = graph.getFiniteStrings(5, -1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "great" }, new int[] { 1 });
    assertFalse(it.hasNext());
    terms = graph.getTerms("field", 5);
    assertArrayEquals(terms, new Term[] { new Term("field", "great") });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 73 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testSingleGraph.

public void testSingleGraph() throws Exception {
    TokenStream ts = new CannedTokenStream(token("fast", 1, 1), token("wi", 1, 1), token("wifi", 0, 2), token("fi", 1, 1), token("network", 1, 1));
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
    Iterator<TokenStream> it = graph.getFiniteStrings();
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast", "wi", "fi", "network" }, new int[] { 1, 1, 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast", "wifi", "network" }, new int[] { 1, 1, 1 });
    assertFalse(it.hasNext());
    int[] points = graph.articulationPoints();
    assertArrayEquals(points, new int[] { 1, 3 });
    assertFalse(graph.hasSidePath(0));
    it = graph.getFiniteStrings(0, 1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "fast" }, new int[] { 1 });
    assertFalse(it.hasNext());
    Term[] terms = graph.getTerms("field", 0);
    assertArrayEquals(terms, new Term[] { new Term("field", "fast") });
    assertTrue(graph.hasSidePath(1));
    it = graph.getFiniteStrings(1, 3);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "wi", "fi" }, new int[] { 1, 1 });
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "wifi" }, new int[] { 1 });
    assertFalse(it.hasNext());
    assertFalse(graph.hasSidePath(3));
    it = graph.getFiniteStrings(3, -1);
    assertTrue(it.hasNext());
    assertTokenStream(it.next(), new String[] { "network" }, new int[] { 1 });
    assertFalse(it.hasNext());
    terms = graph.getTerms("field", 3);
    assertArrayEquals(terms, new Term[] { new Term("field", "network") });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 74 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method testIllegalState.

public void testIllegalState() throws Exception {
    expectThrows(IllegalStateException.class, () -> {
        TokenStream ts = new CannedTokenStream(token("a", 0, 1), token("b", 1, 1));
        new GraphTokenStreamFiniteStrings(ts).getFiniteStrings();
    });
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 75 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testTooManyExpansions.

public void testTooManyExpansions() throws Exception {
    final Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                @Override
                public TokenStream getTokenStream() {
                    Token a = new Token("a", 0, 1);
                    a.setPositionIncrement(1);
                    Token b = new Token("b", 0, 1);
                    b.setPositionIncrement(0);
                    return new CannedTokenStream(new Token[] { a, b });
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a", 1) }));
    assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
    IOUtils.close(a, tempDir);
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Aggregations

CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)78 TokenStream (org.apache.lucene.analysis.TokenStream)43 Token (org.apache.lucene.analysis.Token)37 Directory (org.apache.lucene.store.Directory)33 Document (org.apache.lucene.document.Document)26 TextField (org.apache.lucene.document.TextField)22 Field (org.apache.lucene.document.Field)15 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)14 BytesRef (org.apache.lucene.util.BytesRef)14 FieldType (org.apache.lucene.document.FieldType)13 Term (org.apache.lucene.index.Term)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)11 IndexReader (org.apache.lucene.index.IndexReader)10 Analyzer (org.apache.lucene.analysis.Analyzer)9 StringField (org.apache.lucene.document.StringField)8 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)8 Reader (java.io.Reader)7 StringReader (java.io.StringReader)7 Input (org.apache.lucene.search.suggest.Input)7