Search in sources :

Example 6 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class TestNumericTokenStream method testLongStream.

public void testLongStream() throws Exception {
    @SuppressWarnings("resource") final LegacyNumericTokenStream stream = new LegacyNumericTokenStream().setLongValue(lvalue);
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    assertNotNull(bytesAtt);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    assertNotNull(typeAtt);
    final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
    assertNotNull(numericAtt);
    stream.reset();
    assertEquals(64, numericAtt.getValueSize());
    for (int shift = 0; shift < 64; shift += LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
        assertTrue("New token is available", stream.incrementToken());
        assertEquals("Shift value wrong", shift, numericAtt.getShift());
        assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), LegacyNumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
        assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
        assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("More tokens available", stream.incrementToken());
    stream.end();
    stream.close();
}
Also used : TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream)

Example 7 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class TestNumericTokenStream method testIntStream.

public void testIntStream() throws Exception {
    @SuppressWarnings("resource") final LegacyNumericTokenStream stream = new LegacyNumericTokenStream().setIntValue(ivalue);
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    assertNotNull(bytesAtt);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    assertNotNull(typeAtt);
    final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
    assertNotNull(numericAtt);
    stream.reset();
    assertEquals(32, numericAtt.getValueSize());
    for (int shift = 0; shift < 32; shift += LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
        assertTrue("New token is available", stream.incrementToken());
        assertEquals("Shift value wrong", shift, numericAtt.getShift());
        assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), LegacyNumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
        assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
        assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("More tokens available", stream.incrementToken());
    stream.end();
    stream.close();
}
Also used : TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream)

Example 8 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class JsonPreAnalyzedParser method parse.

@SuppressWarnings("unchecked")
@Override
public ParseResult parse(Reader reader, AttributeSource parent) throws IOException {
    ParseResult res = new ParseResult();
    StringBuilder sb = new StringBuilder();
    char[] buf = new char[128];
    int cnt;
    while ((cnt = reader.read(buf)) > 0) {
        sb.append(buf, 0, cnt);
    }
    String val = sb.toString();
    // empty string - accept even without version number
    if (val.length() == 0) {
        return res;
    }
    Object o = ObjectBuilder.fromJSON(val);
    if (!(o instanceof Map)) {
        throw new IOException("Invalid JSON type " + o.getClass().getName() + ", expected Map");
    }
    Map<String, Object> map = (Map<String, Object>) o;
    // check version
    String version = (String) map.get(VERSION_KEY);
    if (version == null) {
        throw new IOException("Missing VERSION key");
    }
    if (!VERSION.equals(version)) {
        throw new IOException("Unknown VERSION '" + version + "', expected " + VERSION);
    }
    if (map.containsKey(STRING_KEY) && map.containsKey(BINARY_KEY)) {
        throw new IOException("Field cannot have both stringValue and binaryValue");
    }
    res.str = (String) map.get(STRING_KEY);
    String bin = (String) map.get(BINARY_KEY);
    if (bin != null) {
        byte[] data = Base64.base64ToByteArray(bin);
        res.bin = data;
    }
    List<Object> tokens = (List<Object>) map.get(TOKENS_KEY);
    if (tokens == null) {
        return res;
    }
    int tokenStart = 0;
    int tokenEnd = 0;
    parent.clearAttributes();
    for (Object ot : tokens) {
        // automatic increment by 1 separator
        tokenStart = tokenEnd + 1;
        Map<String, Object> tok = (Map<String, Object>) ot;
        boolean hasOffsetStart = false;
        boolean hasOffsetEnd = false;
        int len = -1;
        for (Entry<String, Object> e : tok.entrySet()) {
            String key = e.getKey();
            if (key.equals(TOKEN_KEY)) {
                CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class);
                String str = String.valueOf(e.getValue());
                catt.append(str);
                len = str.length();
            } else if (key.equals(OFFSET_START_KEY)) {
                Object obj = e.getValue();
                hasOffsetStart = true;
                if (obj instanceof Number) {
                    tokenStart = ((Number) obj).intValue();
                } else {
                    try {
                        tokenStart = Integer.parseInt(String.valueOf(obj));
                    } catch (NumberFormatException nfe) {
                        LOG.warn("Invalid " + OFFSET_START_KEY + " attribute, skipped: '" + obj + "'");
                        hasOffsetStart = false;
                    }
                }
            } else if (key.equals(OFFSET_END_KEY)) {
                hasOffsetEnd = true;
                Object obj = e.getValue();
                if (obj instanceof Number) {
                    tokenEnd = ((Number) obj).intValue();
                } else {
                    try {
                        tokenEnd = Integer.parseInt(String.valueOf(obj));
                    } catch (NumberFormatException nfe) {
                        LOG.warn("Invalid " + OFFSET_END_KEY + " attribute, skipped: '" + obj + "'");
                        hasOffsetEnd = false;
                    }
                }
            } else if (key.equals(POSINCR_KEY)) {
                Object obj = e.getValue();
                int posIncr = 1;
                if (obj instanceof Number) {
                    posIncr = ((Number) obj).intValue();
                } else {
                    try {
                        posIncr = Integer.parseInt(String.valueOf(obj));
                    } catch (NumberFormatException nfe) {
                        LOG.warn("Invalid " + POSINCR_KEY + " attribute, skipped: '" + obj + "'");
                    }
                }
                PositionIncrementAttribute patt = parent.addAttribute(PositionIncrementAttribute.class);
                patt.setPositionIncrement(posIncr);
            } else if (key.equals(PAYLOAD_KEY)) {
                String str = String.valueOf(e.getValue());
                if (str.length() > 0) {
                    byte[] data = Base64.base64ToByteArray(str);
                    PayloadAttribute p = parent.addAttribute(PayloadAttribute.class);
                    if (data != null && data.length > 0) {
                        p.setPayload(new BytesRef(data));
                    }
                }
            } else if (key.equals(FLAGS_KEY)) {
                try {
                    int f = Integer.parseInt(String.valueOf(e.getValue()), 16);
                    FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class);
                    flags.setFlags(f);
                } catch (NumberFormatException nfe) {
                    LOG.warn("Invalid " + FLAGS_KEY + " attribute, skipped: '" + e.getValue() + "'");
                }
            } else if (key.equals(TYPE_KEY)) {
                TypeAttribute tattr = parent.addAttribute(TypeAttribute.class);
                tattr.setType(String.valueOf(e.getValue()));
            } else {
                LOG.warn("Unknown attribute, skipped: " + e.getKey() + "=" + e.getValue());
            }
        }
        // handle offset attr
        OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class);
        if (!hasOffsetEnd && len > -1) {
            tokenEnd = tokenStart + len;
        }
        offset.setOffset(tokenStart, tokenEnd);
        if (!hasOffsetStart) {
            tokenStart = tokenEnd + 1;
        }
        // capture state and add to result
        State state = parent.captureState();
        res.states.add(state.clone());
        // reset for reuse
        parent.clearAttributes();
    }
    return res;
}
Also used : PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) ParseResult(org.apache.solr.schema.PreAnalyzedField.ParseResult) IOException(java.io.IOException) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) State(org.apache.lucene.util.AttributeSource.State) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) LinkedList(java.util.LinkedList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) BytesRef(org.apache.lucene.util.BytesRef)

Example 9 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class SpellingQueryConverter method analyze.

protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
    TokenStream stream = analyzer.tokenStream("", text);
    // TODO: support custom attributes
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
        //overwriting any flags already set...
        token.setFlags(flagsAttValue);
        token.setType(typeAtt.type());
        token.setPayload(payloadAtt.getPayload());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        result.add(token);
    }
    stream.end();
    stream.close();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Token(org.apache.lucene.analysis.Token) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 10 with TypeAttribute

use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.

the class SpellCheckComponent method getTokens.

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<>();
    assert analyzer != null;
    try (TokenStream ts = analyzer.tokenStream("", q)) {
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) ArrayList(java.util.ArrayList) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Token(org.apache.lucene.analysis.Token) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Aggregations

TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)17 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)13 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)11 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)10 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)10 TokenStream (org.apache.lucene.analysis.TokenStream)8 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 BytesRef (org.apache.lucene.util.BytesRef)6 IOException (java.io.IOException)5 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)4 ArrayList (java.util.ArrayList)3 Token (org.apache.lucene.analysis.Token)3 StringReader (java.io.StringReader)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Attribute (org.apache.lucene.util.Attribute)2 State (org.apache.lucene.util.AttributeSource.State)2 LegacyNumericTokenStream (org.apache.solr.legacy.LegacyNumericTokenStream)2