Search in sources :

Example 71 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class ShingleFilterTest method testTrailingHole1.

public void testTrailingHole1() throws IOException {
    // Analyzing "wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] { createToken("wizard", 0, 6) };
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);
    assertTokenStreamContents(filter, new String[] { "wizard", "wizard _" }, new int[] { 0, 0 }, new int[] { 6, 9 }, new int[] { 1, 0 }, 9);
}
Also used : Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 72 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellCheckComponent method addOriginalTermsToResponse.

private void addOriginalTermsToResponse(NamedList response, Collection<Token> originalTerms) {
    List<String> originalTermStr = new ArrayList<String>();
    for (Token t : originalTerms) {
        originalTermStr.add(t.toString());
    }
    response.add("originalTerms", originalTermStr);
}
Also used : ArrayList(java.util.ArrayList) Token(org.apache.lucene.analysis.Token)

Example 73 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SynonymTokenizer method getTS2.

protected TokenStream getTS2() {
    // String s = "Hi-Speed10 foo";
    return new TokenStream() {

        Iterator<Token> iter;

        List<Token> lst;

        private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

        private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

        private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

        {
            lst = new ArrayList<>();
            Token t;
            t = createToken("hi", 0, 2);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("hispeed", 0, 8);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("speed", 3, 8);
            t.setPositionIncrement(0);
            lst.add(t);
            t = createToken("10", 8, 10);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("foo", 11, 14);
            t.setPositionIncrement(1);
            lst.add(t);
            iter = lst.iterator();
        }

        @Override
        public boolean incrementToken() {
            if (iter.hasNext()) {
                Token token = iter.next();
                clearAttributes();
                termAtt.setEmpty().append(token);
                posIncrAtt.setPositionIncrement(token.getPositionIncrement());
                offsetAtt.setOffset(token.startOffset(), token.endOffset());
                return true;
            }
            return false;
        }

        @Override
        public void reset() throws IOException {
            super.reset();
            iter = lst.iterator();
        }
    };
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Iterator(java.util.Iterator) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) List(java.util.List) ArrayList(java.util.ArrayList) NodeList(org.w3c.dom.NodeList) Token(org.apache.lucene.analysis.Token) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 74 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SynonymTokenizer method getTS2a.

// same token-stream as above, but the bigger token comes first this time
protected TokenStream getTS2a() {
    // String s = "Hi-Speed10 foo";
    return new TokenStream() {

        Iterator<Token> iter;

        List<Token> lst;

        private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

        private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

        private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

        {
            lst = new ArrayList<>();
            Token t;
            t = createToken("hispeed", 0, 8);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("hi", 0, 2);
            t.setPositionIncrement(0);
            lst.add(t);
            t = createToken("speed", 3, 8);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("10", 8, 10);
            t.setPositionIncrement(1);
            lst.add(t);
            t = createToken("foo", 11, 14);
            t.setPositionIncrement(1);
            lst.add(t);
            iter = lst.iterator();
        }

        @Override
        public boolean incrementToken() {
            if (iter.hasNext()) {
                Token token = iter.next();
                clearAttributes();
                termAtt.setEmpty().append(token);
                posIncrAtt.setPositionIncrement(token.getPositionIncrement());
                offsetAtt.setOffset(token.startOffset(), token.endOffset());
                return true;
            }
            return false;
        }

        @Override
        public void reset() throws IOException {
            super.reset();
            iter = lst.iterator();
        }
    };
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Iterator(java.util.Iterator) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) List(java.util.List) ArrayList(java.util.ArrayList) NodeList(org.w3c.dom.NodeList) Token(org.apache.lucene.analysis.Token) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 75 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SynonymTokenizer method testNotRewriteMultiTermQuery.

public void testNotRewriteMultiTermQuery() throws IOException {
    // field "bar": (not the field we ultimately want to extract)
    MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true);
    WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {

        @Override
        protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
            assertEquals(mtq, query);
            super.extract(query, boost, terms);
        }
    };
    extractor.setExpandMultiTermQuery(true);
    extractor.setMaxDocCharsToAnalyze(51200);
    extractor.getWeightedSpanTerms(mtq, 3, new CannedTokenStream(new Token("aa", 0, 2), new Token("bb", 2, 4)), // field "foo"
    "foo");
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Map(java.util.Map) HashMap(java.util.HashMap) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

Token (org.apache.lucene.analysis.Token)100 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)39 TokenStream (org.apache.lucene.analysis.TokenStream)31 Directory (org.apache.lucene.store.Directory)24 Test (org.junit.Test)23 Document (org.apache.lucene.document.Document)19 TextField (org.apache.lucene.document.TextField)19 BytesRef (org.apache.lucene.util.BytesRef)16 NamedList (org.apache.solr.common.util.NamedList)16 StringReader (java.io.StringReader)15 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)15 Analyzer (org.apache.lucene.analysis.Analyzer)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)13 Field (org.apache.lucene.document.Field)13 FieldType (org.apache.lucene.document.FieldType)11 IndexReader (org.apache.lucene.index.IndexReader)11 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)10 Tokenizer (org.apache.lucene.analysis.Tokenizer)9 Date (java.util.Date)8