Search in sources :

Example 21 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.

the class PathTokenFilterTest method tokenise.

private void tokenise(TokenStream ts, String[] tokens) throws IOException {
    int i = 0;
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    try {
        ts.reset();
        while (ts.incrementToken()) {
            System.out.println("token: " + ts.reflectAsString(true));
            String termText = termAtt.toString();
            if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE)) {
                assert (i % 2 == 0);
                assertEquals(termText, tokens[i++]);
            } else if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX)) {
                assert (i % 2 == 0);
                assertEquals(termText, tokens[i++]);
            } else if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME)) {
                assert (i % 2 == 1);
                assertEquals(termText, tokens[i++]);
            }
        }
        ts.end();
    } finally {
        ts.close();
    }
    if (i != tokens.length) {
        fail("Invalid number of tokens, found " + i + " and expected " + tokens.length);
    }
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute)

Example 22 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.

the class PathTokenFilterTest method testAttributesAfterStreamEnd.

public void testAttributesAfterStreamEnd() throws IOException {
    final String path = "uri1:one";
    StringReader reader = new StringReader(path);
    PathTokenFilter ts = new PathTokenFilter(PathTokenFilter.PATH_SEPARATOR, PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
    ts.setReader(reader);
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    // PathTokenFilter.end() will be called after all tokens consumed.
    tokenise(ts, new String[] { "uri1", "one" });
    // Check attributes cleaned up
    assertEquals("", termAtt.toString());
    // the default
    assertEquals("word", typeAtt.type());
    assertEquals(0, posIncAtt.getPositionIncrement());
    // Final offset...
    assertEquals(path.length(), offsetAtt.startOffset());
    assertEquals(path.length(), offsetAtt.endOffset());
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 23 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.

the class Solr4QueryParser method getToken.

protected String getToken(String field, String value, AnalysisMode analysisMode) throws ParseException {
    try (TokenStream source = getAnalyzer().tokenStream(field, new StringReader(value))) {
        String tokenised = null;
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                token.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                token.setPositionIncrement(posIncAtt.getPositionIncrement());
            }
            tokenised = token.toString();
        }
        return tokenised;
    } catch (IOException e) {
        throw new ParseException("IO" + e.getMessage());
    }
}
Also used : PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) IOException(java.io.IOException) ParseException(org.apache.lucene.queryparser.classic.ParseException) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 24 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.

the class Solr4QueryParser method getFirstTokenForRange.

private String getFirstTokenForRange(String string, FieldInstance field) throws IOException {
    PackedTokenAttributeImpl nextToken;
    TokenStream source = null;
    ;
    try {
        source = getAnalyzer().tokenStream(field.getField(), new StringReader(string));
        source.reset();
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            nextToken = new PackedTokenAttributeImpl();
            nextToken.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            nextToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                nextToken.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                nextToken.setPositionIncrement(posIncAtt.getPositionIncrement());
            }
            return nextToken.toString();
        }
    } finally {
        try {
            if (source != null) {
                source.close();
            }
        } catch (IOException e) {
        // ignore
        }
    }
    return null;
}
Also used : PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) IOException(java.io.IOException) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 25 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project jackrabbit by apache.

the class JackrabbitQueryParser method getPrefixQuery.

/**
 * {@inheritDoc}
 */
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
    // only create a prefix query when the term is a single word / token
    Analyzer a = getAnalyzer();
    TokenStream ts = a.tokenStream(field, new StringReader(termStr));
    int count = 0;
    boolean isCJ = false;
    try {
        TypeAttribute t = ts.addAttribute(TypeAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            count++;
            isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new ParseException(e.getMessage());
    } finally {
        try {
            ts.close();
        } catch (IOException e) {
        // ignore
        }
    }
    if (count > 1 && isCJ) {
        return getFieldQuery(field, termStr);
    } else {
        return getWildcardQuery(field, termStr + "*");
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) IOException(java.io.IOException) ParseException(org.apache.lucene.queryParser.ParseException) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)25 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)21 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)17 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)17 TokenStream (org.apache.lucene.analysis.TokenStream)12 IOException (java.io.IOException)10 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)10 StringReader (java.io.StringReader)8 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 BytesRef (org.apache.lucene.util.BytesRef)6 ArrayList (java.util.ArrayList)5 PackedTokenAttributeImpl (org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl)5 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)4 LinkedList (java.util.LinkedList)3 Token (org.apache.lucene.analysis.Token)3 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Term (org.apache.lucene.index.Term)2 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)2