use of org.apache.uima.cas.text.AnnotationFS in project lucene-solr by apache.
the class SampleEntityAnnotator method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
// "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
String name = "OTHER";
if (annotation.getCoveredText().equals("Apache"))
name = "ORGANIZATION";
entityAnnotation.setStringValue(nameFeature, name);
jcas.addFsToIndexes(entityAnnotation);
}
}
}
use of org.apache.uima.cas.text.AnnotationFS in project lucene-solr by apache.
the class SampleWSTokenizerAnnotator method process.
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
int i = 0;
for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
// add the sentence
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
jCas.addFsToIndexes(sentenceAnnotation);
i += sentenceString.length();
}
// get tokens
int j = 0;
for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
int tokenLength = tokenString.length();
AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
jCas.addFsToIndexes(tokenAnnotation);
j += tokenLength;
}
}
use of org.apache.uima.cas.text.AnnotationFS in project lucene-solr by apache.
the class UIMAAnnotationsTokenizer method incrementToken.
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
return true;
} else {
return false;
}
}
use of org.apache.uima.cas.text.AnnotationFS in project lucene-solr by apache.
the class UIMATypeAwareAnnotationsTokenizer method incrementToken.
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
typeAttr.setType(featurePath.getValueAsString(next));
return true;
} else {
return false;
}
}
use of org.apache.uima.cas.text.AnnotationFS in project deeplearning4j by deeplearning4j.
the class PoStagger method process.
/**
* Performs pos-tagging on the given tcas object.
*/
@Override
public synchronized void process(CAS tcas) {
final AnnotationComboIterator comboIterator = new AnnotationComboIterator(tcas, this.sentenceType, this.tokenType);
for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<>();
final List<String> sentenceTokenList = new LinkedList<>();
for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {
sentenceTokenAnnotationList.add(tokenAnnotation);
sentenceTokenList.add(tokenAnnotation.getCoveredText());
}
final List<String> posTags = this.posTagger.tag(sentenceTokenList);
double[] posProbabilities = null;
if (this.probabilityFeature != null) {
posProbabilities = this.posTagger.probs();
}
final Iterator<String> posTagIterator = posTags.iterator();
final Iterator<AnnotationFS> sentenceTokenIterator = sentenceTokenAnnotationList.iterator();
int index = 0;
while (posTagIterator.hasNext() && sentenceTokenIterator.hasNext()) {
final String posTag = posTagIterator.next();
final AnnotationFS tokenAnnotation = sentenceTokenIterator.next();
tokenAnnotation.setStringValue(this.posFeature, posTag);
if (posProbabilities != null) {
tokenAnnotation.setDoubleValue(this.posFeature, posProbabilities[index]);
}
index++;
}
// log tokens with pos
if (this.logger.isLoggable(Level.FINER)) {
final StringBuilder sentenceWithPos = new StringBuilder();
sentenceWithPos.append("\"");
for (final Iterator<AnnotationFS> it = sentenceTokenAnnotationList.iterator(); it.hasNext(); ) {
final AnnotationFS token = it.next();
sentenceWithPos.append(token.getCoveredText());
sentenceWithPos.append('\\');
sentenceWithPos.append(token.getStringValue(this.posFeature));
sentenceWithPos.append(' ');
}
// delete last whitespace
if (// not 0 because it contains already the " char
sentenceWithPos.length() > 1)
sentenceWithPos.setLength(sentenceWithPos.length() - 1);
sentenceWithPos.append("\"");
this.logger.log(Level.FINER, sentenceWithPos.toString());
}
}
}
Aggregations