Search in sources :

Example 16 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class SimplePreAnalyzedParser method createState.

private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) {
    a.clearAttributes();
    CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class);
    char[] tokChars = state.token.toString().toCharArray();
    termAtt.copyBuffer(tokChars, 0, tokChars.length);
    int tokenStart = tokenEnd - state.token.length();
    for (Entry<String, String> e : state.attr.entrySet()) {
        String k = e.getKey();
        if (k.equals("i")) {
            // position increment
            int incr = Integer.parseInt(e.getValue());
            PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class);
            posIncr.setPositionIncrement(incr);
        } else if (k.equals("s")) {
            tokenStart = Integer.parseInt(e.getValue());
        } else if (k.equals("e")) {
            tokenEnd = Integer.parseInt(e.getValue());
        } else if (k.equals("y")) {
            TypeAttribute type = a.addAttribute(TypeAttribute.class);
            type.setType(e.getValue());
        } else if (k.equals("f")) {
            FlagsAttribute flags = a.addAttribute(FlagsAttribute.class);
            int f = Integer.parseInt(e.getValue(), 16);
            flags.setFlags(f);
        } else if (k.equals("p")) {
            PayloadAttribute p = a.addAttribute(PayloadAttribute.class);
            byte[] data = hexToBytes(e.getValue());
            if (data != null && data.length > 0) {
                p.setPayload(new BytesRef(data));
            }
        } else {
        // unknown attribute
        }
    }
    // handle offset attr
    OffsetAttribute offset = a.addAttribute(OffsetAttribute.class);
    offset.setOffset(tokenStart, tokenEnd);
    State resState = a.captureState();
    a.clearAttributes();
    return resState;
}
Also used : FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) State(org.apache.lucene.util.AttributeSource.State) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) BytesRef(org.apache.lucene.util.BytesRef) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 17 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class TypeAsPayloadTokenFilterTest method test.

public void test() throws IOException {
    String test = "The quick red fox jumped over the lazy brown dogs";
    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(whitespaceMockTokenizer(test)));
    int count = 0;
    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    nptf.reset();
    while (nptf.incrementToken()) {
        assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
        assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
        String type = payloadAtt.getPayload().utf8ToString();
        assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()));
        count++;
    }
    assertTrue(count + " does not equal: " + 10, count == 10);
}
Also used : PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute)

Aggregations

TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)17 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)13 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)11 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)10 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)10 TokenStream (org.apache.lucene.analysis.TokenStream)8 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 BytesRef (org.apache.lucene.util.BytesRef)6 IOException (java.io.IOException)5 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)4 ArrayList (java.util.ArrayList)3 Token (org.apache.lucene.analysis.Token)3 StringReader (java.io.StringReader)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Attribute (org.apache.lucene.util.Attribute)2 State (org.apache.lucene.util.AttributeSource.State)2 LegacyNumericTokenStream (org.apache.solr.legacy.LegacyNumericTokenStream)2