use of com.twitter.common.text.token.attribute.TokenGroupAttribute in project commons by twitter.
the class TokenizedCharSequence method createFromTokenGroupsIn.
public static final List<TokenizedCharSequence> createFromTokenGroupsIn(TwitterTokenStream stream) {
TokenGroupAttribute groupAttr = stream.getAttribute(TokenGroupAttribute.class);
List<TokenizedCharSequence> groups = Lists.newArrayList();
while (stream.incrementToken()) {
Builder builder = new Builder(stream.term());
TwitterTokenStream groupStream = groupAttr.getTokenGroupStream();
PartOfSpeechAttribute posAttr = null;
if (groupStream.hasAttribute(PartOfSpeechAttribute.class)) {
posAttr = groupStream.getAttribute(PartOfSpeechAttribute.class);
}
PositionIncrementAttribute incAttr = null;
if (groupStream.hasAttribute(PositionIncrementAttribute.class)) {
incAttr = groupStream.getAttribute(PositionIncrementAttribute.class);
}
TokenGroupAttributeImpl innerGroupAttr = null;
if (groupStream.hasAttribute(TokenGroupAttribute.class)) {
innerGroupAttr = (TokenGroupAttributeImpl) groupStream.getAttribute(TokenGroupAttribute.class);
}
while (groupStream.incrementToken()) {
builder.addToken(groupStream.offset() - stream.offset(), groupStream.length(), groupStream.type(), posAttr == null ? Token.DEFAULT_PART_OF_SPEECH : posAttr.getPOS(), incAttr == null ? 1 : incAttr.getPositionIncrement(), innerGroupAttr == null || innerGroupAttr.isEmpty() ? null : (innerGroupAttr.getSequence() == null ? createFrom(innerGroupAttr.getTokenGroupStream()) : innerGroupAttr.getSequence()));
}
groups.add(builder.build());
}
return groups;
}
Aggregations