Search in sources :

Example 26 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestGraphTokenStreamFiniteStrings method assertTokenStream.

private void assertTokenStream(TokenStream ts, String[] terms, int[] increments) throws Exception {
    // verify no nulls and arrays same length
    assertNotNull(ts);
    assertNotNull(terms);
    assertNotNull(increments);
    assertEquals(terms.length, increments.length);
    BytesTermAttribute termAtt = ts.getAttribute(BytesTermAttribute.class);
    PositionIncrementAttribute incrAtt = ts.getAttribute(PositionIncrementAttribute.class);
    int offset = 0;
    while (ts.incrementToken()) {
        // verify term and increment
        assert offset < terms.length;
        assertEquals(terms[offset], termAtt.getBytesRef().utf8ToString());
        assertEquals(increments[offset], incrAtt.getPositionIncrement());
        offset++;
    }
    // make sure we processed all items
    assertEquals(offset, terms.length);
}
Also used : BytesTermAttribute(org.apache.lucene.analysis.tokenattributes.BytesTermAttribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 27 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class SpellCheckComponent method getTokens.

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<>();
    assert analyzer != null;
    try (TokenStream ts = analyzer.tokenStream("", q)) {
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) ArrayList(java.util.ArrayList) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Token(org.apache.lucene.analysis.Token) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 28 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class SimplePreAnalyzedParser method toFormattedString.

@Override
public String toFormattedString(Field f) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.append(VERSION + " ");
    if (f.fieldType().stored()) {
        String s = f.stringValue();
        if (s != null) {
            // encode the equals sign
            s = s.replaceAll("=", "\\=");
            sb.append('=');
            sb.append(s);
            sb.append('=');
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        StringBuilder tok = new StringBuilder();
        boolean next = false;
        while (ts.incrementToken()) {
            if (next) {
                sb.append(' ');
            } else {
                next = true;
            }
            tok.setLength(0);
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                Attribute att = ts.getAttribute(cl);
                if (att == null) {
                    continue;
                }
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = escape(catt.buffer(), catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
                    tTerm = escape(tTermChars, tTermChars.length);
                } else {
                    if (tok.length() > 0)
                        tok.append(',');
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.append("f=" + Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.append("s=" + ((OffsetAttribute) att).startOffset() + ",e=" + ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.append("p=" + bytesToHex(p.bytes, p.offset, p.length));
                        } else if (tok.length() > 0) {
                            // remove the last comma
                            tok.setLength(tok.length() - 1);
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.append("i=" + ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.append("y=" + escape(((TypeAttribute) att).type()));
                    } else {
                        tok.append(cl.getName() + "=" + escape(att.toString()));
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                if (tok.length() > 0) {
                    tok.insert(0, term + ",");
                } else {
                    tok.insert(0, term);
                }
            }
            sb.append(tok);
        }
    }
    return sb.toString();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Attribute(org.apache.lucene.util.Attribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) BytesRef(org.apache.lucene.util.BytesRef)

Example 29 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-skos by behas.

the class SKOSQueryNodeProcessor method postProcessNode.

@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) {
        FieldQueryNode fieldNode = ((FieldQueryNode) node);
        String text = fieldNode.getTextAsString();
        String field = fieldNode.getFieldAsString();
        CachingTokenFilter buffer = null;
        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;
        try {
            try (TokenStream source = this.analyzer.tokenStream(field, text)) {
                buffer = new CachingTokenFilter(source);
                buffer.reset();
                if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
                    posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
                }
                try {
                    while (buffer.incrementToken()) {
                        numTokens++;
                        int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                        if (positionIncrement != 0) {
                            positionCount += positionIncrement;
                        } else {
                            severalTokensAtSamePosition = true;
                        }
                    }
                } catch (IOException e) {
                // ignore
                }
                // rewind the buffer stream
                //will never through on subsequent reset calls
                buffer.reset();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            if (!buffer.hasAttribute(CharTermAttribute.class)) {
                return new NoTokenFoundQueryNode();
            }
            CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
            if (numTokens == 0) {
                return new NoTokenFoundQueryNode();
            } else if (numTokens == 1) {
                String term = null;
                try {
                    boolean hasNext;
                    hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
                }
                fieldNode.setText(term);
                return fieldNode;
            } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
                if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
                    if (positionCount == 1) {
                        // simple case: only one position, with synonyms
                        LinkedList<QueryNode> children = new LinkedList<>();
                        for (int i = 0; i < numTokens; i++) {
                            String term = null;
                            try {
                                boolean hasNext = buffer.incrementToken();
                                assert hasNext == true;
                                term = termAtt.toString();
                            } catch (IOException e) {
                            // safe to ignore, because we know the number of tokens
                            }
                            if (buffer.hasAttribute(SKOSTypeAttribute.class) && boosts != null) {
                                SKOSTypeAttribute skosAttr = buffer.getAttribute(SKOSTypeAttribute.class);
                                children.add(new BoostQueryNode(new FieldQueryNode(field, term, -1, -1), getBoost(skosAttr.getSkosType())));
                            } else {
                                children.add(new FieldQueryNode(field, term, -1, -1));
                            }
                        }
                        return new GroupQueryNode(new StandardBooleanQueryNode(children, positionCount == 1));
                    } else {
                        // multiple positions
                        QueryNode q = new StandardBooleanQueryNode(Collections.<QueryNode>emptyList(), false);
                        QueryNode currentQuery = null;
                        for (int i = 0; i < numTokens; i++) {
                            String term = null;
                            try {
                                boolean hasNext = buffer.incrementToken();
                                assert hasNext == true;
                                term = termAtt.toString();
                            } catch (IOException e) {
                            // safe to ignore, because we know the number of tokens
                            }
                            if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
                                if (!(currentQuery instanceof BooleanQueryNode)) {
                                    QueryNode t = currentQuery;
                                    currentQuery = new StandardBooleanQueryNode(Collections.<QueryNode>emptyList(), true);
                                    ((BooleanQueryNode) currentQuery).add(t);
                                }
                                ((BooleanQueryNode) currentQuery).add(new FieldQueryNode(field, term, -1, -1));
                            } else {
                                if (currentQuery != null) {
                                    if (this.defaultOperator == Operator.OR) {
                                        q.add(currentQuery);
                                    } else {
                                        q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                                    }
                                }
                                currentQuery = new FieldQueryNode(field, term, -1, -1);
                            }
                        }
                        if (this.defaultOperator == Operator.OR) {
                            q.add(currentQuery);
                        } else {
                            q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                        }
                        if (q instanceof BooleanQueryNode) {
                            q = new GroupQueryNode(q);
                        }
                        return q;
                    }
                } else {
                    // phrase query:
                    MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
                    List<FieldQueryNode> multiTerms = new ArrayList<>();
                    int position = -1;
                    int i = 0;
                    int termGroupCount = 0;
                    for (; i < numTokens; i++) {
                        String term = null;
                        int positionIncrement = 1;
                        try {
                            boolean hasNext = buffer.incrementToken();
                            assert hasNext == true;
                            term = termAtt.toString();
                            if (posIncrAtt != null) {
                                positionIncrement = posIncrAtt.getPositionIncrement();
                            }
                        } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                        }
                        if (positionIncrement > 0 && multiTerms.size() > 0) {
                            for (FieldQueryNode termNode : multiTerms) {
                                if (this.positionIncrementsEnabled) {
                                    termNode.setPositionIncrement(position);
                                } else {
                                    termNode.setPositionIncrement(termGroupCount);
                                }
                                mpq.add(termNode);
                            }
                            // Only increment once for each "group" of
                            // terms that were in the same position:
                            termGroupCount++;
                            multiTerms.clear();
                        }
                        position += positionIncrement;
                        multiTerms.add(new FieldQueryNode(field, term, -1, -1));
                    }
                    for (FieldQueryNode termNode : multiTerms) {
                        if (this.positionIncrementsEnabled) {
                            termNode.setPositionIncrement(position);
                        } else {
                            termNode.setPositionIncrement(termGroupCount);
                        }
                        mpq.add(termNode);
                    }
                    return mpq;
                }
            } else {
                TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                    }
                    FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                    if (this.positionIncrementsEnabled) {
                        position += positionIncrement;
                        newFieldNode.setPositionIncrement(position);
                    } else {
                        newFieldNode.setPositionIncrement(i);
                    }
                    pq.add(newFieldNode);
                }
                return pq;
            }
        } finally {
            if (buffer != null) {
                try {
                    buffer.close();
                } catch (IOException e) {
                // safe to ignore
                }
            }
        }
    }
    return node;
}
Also used : FuzzyQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode) TokenStream(org.apache.lucene.analysis.TokenStream) SKOSTypeAttribute(at.ac.univie.mminf.luceneSKOS.analysis.SKOSTypeAttribute) QuotedFieldQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode) ArrayList(java.util.ArrayList) GroupQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode) WildcardQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode) FieldQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode) QuotedFieldQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode) NoTokenFoundQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode) BoostQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode) RegexpQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode) IOException(java.io.IOException) LinkedList(java.util.LinkedList) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) StandardBooleanQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode) TokenizedPhraseQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode) RangeQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode) ModifierQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode) MultiPhraseQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) TokenizedPhraseQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode) RangeQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode) NoTokenFoundQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode) RegexpQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode) GroupQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode) FieldQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode) BooleanQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode) FuzzyQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode) QueryNode(org.apache.lucene.queryparser.flexible.core.nodes.QueryNode) TextableQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.TextableQueryNode) MultiPhraseQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode) BoostQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode) ModifierQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode) QuotedFieldQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode) WildcardQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode) StandardBooleanQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode) TextableQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.TextableQueryNode) BooleanQueryNode(org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode) StandardBooleanQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode)

Example 30 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project elasticsearch by elastic.

the class SimpleQueryParser method newPossiblyAnalyzedQuery.

/**
     * Analyze the given string using its analyzer, constructing either a
     * {@code PrefixQuery} or a {@code BooleanQuery} made up
     * of {@code TermQuery}s and {@code PrefixQuery}s
     */
private Query newPossiblyAnalyzedQuery(String field, String termStr) {
    List<List<BytesRef>> tlist = new ArrayList<>();
    // get Analyzer from superclass and tokenize the term
    try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) {
        source.reset();
        List<BytesRef> currentPos = new ArrayList<>();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
        try {
            boolean hasMoreTokens = source.incrementToken();
            while (hasMoreTokens) {
                if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
                    tlist.add(currentPos);
                    currentPos = new ArrayList<>();
                }
                final BytesRef term = getAnalyzer().normalize(field, termAtt.toString());
                currentPos.add(term);
                hasMoreTokens = source.incrementToken();
            }
            if (currentPos.isEmpty() == false) {
                tlist.add(currentPos);
            }
        } catch (IOException e) {
        // ignore
        // TODO: we should not ignore the exception and return a prefix query with the original term ?
        }
    } catch (IOException e) {
        // Bail on any exceptions, going with a regular prefix query
        return new PrefixQuery(new Term(field, termStr));
    }
    if (tlist.size() == 0) {
        return null;
    }
    if (tlist.size() == 1 && tlist.get(0).size() == 1) {
        return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
    }
    // build a boolean query with prefix on the last position only.
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (int pos = 0; pos < tlist.size(); pos++) {
        List<BytesRef> plist = tlist.get(pos);
        boolean isLastPos = (pos == tlist.size() - 1);
        Query posQuery;
        if (plist.size() == 1) {
            if (isLastPos) {
                posQuery = new PrefixQuery(new Term(field, plist.get(0)));
            } else {
                posQuery = newTermQuery(new Term(field, plist.get(0)));
            }
        } else if (isLastPos == false) {
            // build a synonym query for terms in the same position.
            Term[] terms = new Term[plist.size()];
            for (int i = 0; i < plist.size(); i++) {
                terms[i] = new Term(field, plist.get(i));
            }
            posQuery = new SynonymQuery(terms);
        } else {
            BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
            for (BytesRef token : plist) {
                innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
            }
            posQuery = innerBuilder.setDisableCoord(true).build();
        }
        builder.add(new BooleanClause(posQuery, getDefaultOperator()));
    }
    return builder.build();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) BooleanClause(org.apache.lucene.search.BooleanClause) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) PrefixQuery(org.apache.lucene.search.PrefixQuery) ArrayList(java.util.ArrayList) List(java.util.List) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)50 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)33 TokenStream (org.apache.lucene.analysis.TokenStream)28 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)25 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 BytesRef (org.apache.lucene.util.BytesRef)14 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)11 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)11 TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)11 StringReader (java.io.StringReader)8 Term (org.apache.lucene.index.Term)8 Token (org.apache.lucene.analysis.Token)7 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 PositionLengthAttribute (org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute)7 List (java.util.List)6 LinkedList (java.util.LinkedList)4 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)4 Document (org.apache.lucene.document.Document)4 Iterator (java.util.Iterator)3