use of org.apache.lucene.util.AttributeReflector in project lucene-solr by apache.
the class AnalysisRequestHandlerBase method convertTokensToNamedLists.
/**
* Converts the list of Tokens to a list of NamedLists representing the tokens.
*
* @param tokenList Tokens to convert
* @param context The analysis context
*
* @return List of NamedLists containing the relevant information taken from the tokens
*/
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
final List<NamedList> tokensNamedLists = new ArrayList<>();
final FieldType fieldType = context.getFieldType();
final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
// sort the tokens by absolute position
ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {
@Override
public int compare(AttributeSource a, AttributeSource b) {
return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions());
}
private int arrayCompare(int[] a, int[] b) {
int p = 0;
final int stop = Math.min(a.length, b.length);
while (p < stop) {
int diff = a[p] - b[p];
if (diff != 0)
return diff;
p++;
}
// One is a prefix of the other, or, they are equal:
return a.length - b.length;
}
});
for (int i = 0; i < tokens.length; i++) {
AttributeSource token = tokens[i];
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef rawBytes = termAtt.getBytesRef();
final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
tokenNamedList.add("text", text);
if (token.hasAttribute(CharTermAttribute.class)) {
final String rawText = token.getAttribute(CharTermAttribute.class).toString();
if (!rawText.equals(text)) {
tokenNamedList.add("raw_text", rawText);
}
}
tokenNamedList.add("raw_bytes", rawBytes.toString());
if (context.getTermsToMatch().contains(rawBytes)) {
tokenNamedList.add("match", true);
}
token.reflectWith(new AttributeReflector() {
@Override
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
// leave out position and bytes term
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
return;
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
String k = attClass.getName() + '#' + key;
// map keys for "standard attributes":
if (ATTRIBUTE_MAPPING.containsKey(k)) {
k = ATTRIBUTE_MAPPING.get(k);
}
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
tokenNamedList.add(k, value);
}
});
tokensNamedLists.add(tokenNamedList);
}
return tokensNamedLists;
}
Aggregations