use of opennlp.tools.util.Sequence in project stanbol by apache.
the class OpenNlpPosTaggingEngine method posTag.
/**
* POS tags the parsed tokens by using the pos tagger. Annotations are
* added based on the posModel and already created adhoc tags.
* @param tokenList
* @param posTagger
* @param posModel
* @param adhocTags
* @param language
*/
private void posTag(List<Token> tokenList, POSTagger posTagger, TagSet<PosTag> posModel, Map<String, PosTag> adhocTags, String language) {
String[] tokenTexts = new String[tokenList.size()];
for (int i = 0; i < tokenList.size(); i++) {
tokenTexts[i] = tokenList.get(i).getSpan();
}
//get the topK POS tags and props and copy it over to the 2dim Arrays
Sequence[] posSequences = posTagger.topKSequences(tokenTexts);
//extract the POS tags and props for the current token from the
//posSequences.
//NOTE: Sequence includes always POS tags for all Tokens. If
// less then posSequences.length are available it adds the
// best match for all followings.
// We do not want such copies.
PosTag[] actPos = new PosTag[posSequences.length];
double[] actProp = new double[posSequences.length];
for (int i = 0; i < tokenTexts.length; i++) {
Token token = tokenList.get(i);
boolean done = false;
int j = 0;
while (j < posSequences.length && !done) {
String p = posSequences[j].getOutcomes().get(i);
done = j > 0 && p.equals(actPos[0].getTag());
if (!done) {
actPos[j] = getPosTag(posModel, adhocTags, p, language);
actProp[j] = posSequences[j].getProbs()[i];
j++;
}
}
//create the POS values
token.addAnnotations(POS_ANNOTATION, Value.values(actPos, actProp, j));
}
}