Search in sources :

Example 1 with AttributeReflector

use of org.apache.lucene.util.AttributeReflector in project lucene-solr by apache.

the class AnalysisRequestHandlerBase method convertTokensToNamedLists.

/**
   * Converts the list of Tokens to a list of NamedLists representing the tokens.
   *
   * @param tokenList  Tokens to convert
   * @param context The analysis context
   *
   * @return List of NamedLists containing the relevant information taken from the tokens
   */
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
    // sort the tokens by absolute position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {

        @Override
        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions());
        }

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
                p++;
            }
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
        }
    });
    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
        tokenNamedList.add("text", text);
        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
            }
        }
        tokenNamedList.add("raw_bytes", rawBytes.toString());
        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        }
        token.reflectWith(new AttributeReflector() {

            @Override
            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                    return;
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                    return;
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                    return;
                String k = attClass.getName() + '#' + key;
                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                }
                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }
                tokenNamedList.add(k, value);
            }
        });
        tokensNamedLists.add(tokenNamedList);
    }
    return tokensNamedLists;
}
Also used : AttributeSource(org.apache.lucene.util.AttributeSource) NamedList(org.apache.solr.common.util.NamedList) AttributeReflector(org.apache.lucene.util.AttributeReflector) ArrayList(java.util.ArrayList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

ArrayList (java.util.ArrayList)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)1 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)1 AttributeReflector (org.apache.lucene.util.AttributeReflector)1 AttributeSource (org.apache.lucene.util.AttributeSource)1 BytesRef (org.apache.lucene.util.BytesRef)1 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)1 NamedList (org.apache.solr.common.util.NamedList)1 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)1 FieldType (org.apache.solr.schema.FieldType)1