use of org.apache.uima.cas.Type in project lucene-solr by apache.
the class SampleEntityAnnotator method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);
for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
String tokenPOS = ((TokenAnnotation) annotation).getPosTag();
if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());
entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());
// "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
String name = "OTHER";
if (annotation.getCoveredText().equals("Apache"))
name = "ORGANIZATION";
entityAnnotation.setStringValue(nameFeature, name);
jcas.addFsToIndexes(entityAnnotation);
}
}
}
use of org.apache.uima.cas.Type in project lucene-solr by apache.
the class SamplePoSTagger method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
Feature posFeature = type.getFeatureByBaseName(FEATURE_NAME);
for (Annotation annotation : jcas.getAnnotationIndex(type)) {
String text = annotation.getCoveredText();
String pos = extractPoS(text);
annotation.setStringValue(posFeature, pos);
}
}
use of org.apache.uima.cas.Type in project lucene-solr by apache.
the class SampleWSTokenizerAnnotator method process.
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
int i = 0;
for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
// add the sentence
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
jCas.addFsToIndexes(sentenceAnnotation);
i += sentenceString.length();
}
// get tokens
int j = 0;
for (String tokenString : jCas.getDocumentText().split(WHITESPACE)) {
int tokenLength = tokenString.length();
AnnotationFS tokenAnnotation = jCas.getCas().createAnnotation(tokenType, j, j + tokenLength);
jCas.addFsToIndexes(tokenAnnotation);
j += tokenLength;
}
}
use of org.apache.uima.cas.Type in project lucene-solr by apache.
the class UIMAAnnotationsTokenizer method initializeIterator.
@Override
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
use of org.apache.uima.cas.Type in project lucene-solr by apache.
the class UIMATypeAwareAnnotationsTokenizer method initializeIterator.
@Override
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException | ResourceInitializationException e) {
throw new IOException(e);
}
featurePath = cas.createFeaturePath();
try {
featurePath.initialize(typeAttributeFeaturePath);
} catch (CASException e) {
featurePath = null;
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
Aggregations