use of org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor in project dkpro-lab by dkpro.
the class ExamplePosAnnotator method process.
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Collection<TOP> addToIndexes = new ArrayList<TOP>();
// generate a list of training instances for each sentence in the document
for (Sentence sentence : select(jCas, Sentence.class)) {
List<Instance<String>> instances = new ArrayList<Instance<String>>();
List<Token> tokens = selectCovered(jCas, Token.class, sentence);
// for each token, extract all feature values and the label
for (Token token : tokens) {
Instance<String> instance = new Instance<String>();
// extract all features that require only the token annotation
for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
instance.addAll(extractor.extract(jCas, token));
}
// extract all features that require the token and sentence annotations
for (ContextExtractor<Token> extractor : this.contextFeatureExtractors) {
instance.addAll(extractor.extractWithin(jCas, token, sentence));
}
// set the instance label from the token's part of speech
if (this.isTraining()) {
instance.setOutcome(token.getPos().getPosValue());
}
// add the instance to the list
instances.add(instance);
}
if (this.isTraining()) {
// for training, write instances to the data write
this.dataWriter.write(instances);
} else {
// for classification, set the labels as the token POS labels
Iterator<Token> tokensIter = tokens.iterator();
List<String> labels = classify(instances);
for (String label : labels) {
Token t = tokensIter.next();
POS pos = t.getPos();
if (pos == null) {
pos = new POS(jCas, t.getBegin(), t.getEnd());
addToIndexes.add(pos);
t.setPos(pos);
}
pos.setPosValue(label);
}
}
for (TOP fs : addToIndexes) {
fs.addToIndexes();
}
}
}
Aggregations