use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv2Reader method createSpanAnnotation.
private void createSpanAnnotation(JCas aJcas, int aTokenStart, Map<Type, Set<Feature>> aLayers, Map<Type, Type> aRelationayers, Map<Type, Map<Integer, AnnotationFS>> aAnnotations, Map<Type, Map<Integer, String>> aBeginEndAnno, Map<Type, Map<String, List<AnnotationFS>>> aTokenAnnotations, Map<Type, Map<String, List<String>>> aRelationTargets, StringTokenizer lineTk, String aToken, String aTokenNumberColumn) {
for (Type layer : aLayers.keySet()) {
int lastIndex = 1;
// if a layer is bound to a single token but has multiple feature
// annotation is created once and feature values be appended
Map<Integer, AnnotationFS> singleTokenMultiFeature = new HashMap<>();
// The relation line number should be read once all feature columns
// are obtained
int numberOfFeaturesPerLayer = aLayers.get(layer).size();
for (Feature feature : aLayers.get(layer)) {
numberOfFeaturesPerLayer--;
int index = 1;
String multipleAnnotations = lineTk.nextToken();
String relationTargetNumbers = null;
if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
relationTargetNumbers = lineTk.nextToken();
}
int i = 0;
String[] relationTargets = null;
if (relationTargetNumbers != null) {
relationTargets = relationTargetNumbers.split("\\|");
}
for (String annotation : multipleAnnotations.split("\\|")) {
// If annotation is not on multpile spans
if (!(annotation.startsWith("B-") || annotation.startsWith("I-") || annotation.startsWith("O-")) && !(annotation.equals("_") || annotation.equals("O"))) {
AnnotationFS newAnnotation;
// annotation only once
if (singleTokenMultiFeature.get(index) == null) {
newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
singleTokenMultiFeature.put(index, newAnnotation);
} else {
newAnnotation = singleTokenMultiFeature.get(index);
}
// stripped out - make it null
if (annotation.startsWith(layer.getName())) {
annotation = null;
}
newAnnotation.setFeatureValueFromString(feature, annotation);
aJcas.addFsToIndexes(newAnnotation);
// Set the POS to the token
if (layer.getName().equals(POS.class.getName())) {
indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setPos((POS) newAnnotation);
}
// Set the Lemma to the token
if (layer.getName().equals(Lemma.class.getName())) {
indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setLemma((Lemma) newAnnotation);
}
if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
Map<String, List<String>> targets = aRelationTargets.get(layer);
if (targets == null) {
List<String> governors = new ArrayList<>();
governors.add(relationTargets[i]);
targets = new HashMap<>();
targets.put(aTokenNumberColumn, governors);
i++;
aRelationTargets.put(layer, targets);
} else {
List<String> governors = targets.get(aTokenNumberColumn);
if (governors == null) {
governors = new ArrayList<>();
}
governors.add(relationTargets[i]);
targets.put(aTokenNumberColumn, governors);
i++;
aRelationTargets.put(layer, targets);
}
}
Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
if (tokenAnnotations == null) {
tokenAnnotations = new HashMap<>();
}
List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
if (relAnnos == null) {
relAnnos = new ArrayList<>();
}
relAnnos.add(newAnnotation);
tokenAnnotations.put(aTokenNumberColumn, relAnnos);
aTokenAnnotations.put(layer, tokenAnnotations);
index++;
} else // O-_ is a position marker
if (annotation.equals("O-_") || annotation.equals("B-_") || annotation.equals("I-_")) {
index++;
} else if (annotation.startsWith("B-")) {
boolean isNewAnnotation = true;
Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
AnnotationFS newAnnotation;
if (indexedAnnos == null) {
newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
indexedAnnos = new LinkedHashMap<>();
indexedBeginEndAnnos = new LinkedHashMap<>();
} else if (indexedAnnos.get(index) == null) {
newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
} else if (indexedAnnos.get(index) != null && indexedBeginEndAnnos.get(index).equals("E-")) {
newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
} else // annotation
if (indexedBeginEndAnnos.get(index).equals("I-")) {
newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
} else {
newAnnotation = indexedAnnos.get(index);
isNewAnnotation = false;
}
// remove prefixes such as B-/I- before creating the
// annotation
annotation = (annotation.substring(2));
if (annotation.startsWith(layer.getName())) {
annotation = null;
}
newAnnotation.setFeatureValueFromString(feature, annotation);
aJcas.addFsToIndexes(newAnnotation);
indexedAnnos.put(index, newAnnotation);
indexedBeginEndAnnos.put(index, "B-");
aAnnotations.put(layer, indexedAnnos);
if (aRelationayers.containsKey(layer)) {
Map<String, List<String>> targets = aRelationTargets.get(layer);
if (targets == null) {
List<String> governors = new ArrayList<>();
governors.add(relationTargets[i]);
targets = new HashMap<>();
targets.put(aTokenNumberColumn, governors);
i++;
aRelationTargets.put(layer, targets);
} else {
List<String> governors = targets.get(aTokenNumberColumn);
if (governors == null) {
governors = new ArrayList<>();
}
governors.add(relationTargets[i]);
targets.put(aTokenNumberColumn, governors);
i++;
aRelationTargets.put(layer, targets);
}
}
Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
if (isNewAnnotation) {
if (tokenAnnotations == null) {
tokenAnnotations = new HashMap<>();
}
List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
if (relAnnos == null) {
relAnnos = new ArrayList<>();
}
relAnnos.add(newAnnotation);
tokenAnnotations.put(aTokenNumberColumn, relAnnos);
aTokenAnnotations.put(layer, tokenAnnotations);
}
aBeginEndAnno.put(layer, indexedBeginEndAnnos);
index++;
} else if (annotation.startsWith("I-")) {
// beginEndAnnotation.put(layer, "I-");
Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
indexedBeginEndAnnos.put(index, "I-");
aBeginEndAnno.put(layer, indexedBeginEndAnnos);
Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
AnnotationFS newAnnotation = indexedAnnos.get(index);
((Annotation) newAnnotation).setEnd(aTokenStart + aToken.length());
index++;
} else {
aAnnotations.put(layer, null);
index++;
}
}
lastIndex = index - 1;
}
// tokens annotated as B-X B-X, no B-I means it is end by itself
for (int i = 1; i <= lastIndex; i++) {
if (aBeginEndAnno.get(layer) != null && aBeginEndAnno.get(layer).get(i) != null && aBeginEndAnno.get(layer).get(i).equals("B-")) {
aBeginEndAnno.get(layer).put(i, "E-");
}
}
}
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class TypeSystemAnalysis method isSlotFeature.
private boolean isSlotFeature(TypeSystem aTS, Feature aFeature) {
// Slot features are multi-valued
if (!FSUtil.isMultiValuedFeature(aTS, aFeature)) {
return false;
}
// The component type is the link type - it must be present
Type linkType = aFeature.getRange().getComponentType();
if (linkType == null) {
return false;
}
// The range of the slot feature is its link type which must inherit from TOP
if (!aTS.getTopType().equals(aTS.getParent(linkType))) {
return false;
}
// The link feature must have exactly two features (link-with-role)
if (linkType.getFeatures().size() != 2) {
return false;
}
Optional<Feature> roleFeature = linkType.getFeatures().stream().filter(f -> f.getRange().getName().equals(CAS.TYPE_NAME_STRING)).findFirst();
if (!roleFeature.isPresent()) {
return false;
}
Optional<Feature> linkFeature = linkType.getFeatures().stream().filter(f -> !f.getRange().isPrimitive()).findFirst();
if (!linkFeature.isPresent()) {
return false;
}
// Hm, ok, so this looks like a slot feature.
return true;
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class RemoveDanglingRelationsRepair method repair.
@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
Set<FeatureStructure> nonIndexed = getNonIndexedFSes(aCas);
Set<FeatureStructure> toDelete = new LinkedHashSet<>();
for (AnnotationFS fs : aCas.getAnnotationIndex()) {
Type t = fs.getType();
Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
// Is this a relation?
if (!(sourceFeat != null && targetFeat != null)) {
continue;
}
FeatureStructure source = fs.getFeatureValue(sourceFeat);
FeatureStructure target = fs.getFeatureValue(targetFeat);
// Does it point to deleted spans?
if (nonIndexed.contains(source) || nonIndexed.contains(target)) {
toDelete.add(fs);
}
}
// Delete those relations that pointed to deleted spans
if (!toDelete.isEmpty()) {
toDelete.forEach(aCas::removeFsFromIndexes);
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed [%d] dangling relations.", nonIndexed.size()));
}
}
use of org.apache.uima.cas.Feature in project dkpro-lab by dkpro.
the class SimpleExecutionEngine method run.
@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
if (!(aConfiguration instanceof UimaTask)) {
throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
}
UimaTask configuration = (UimaTask) aConfiguration;
// Create persistence service for injection into analysis components
TaskContext ctx = contextFactory.createContext(aConfiguration);
try {
ResourceManager resMgr = newDefaultResourceManager();
// Make sure the descriptor is fully resolved. It will be modified and
// thus should not be modified again afterwards by UIMA.
AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
analysisDesc.resolveImports(resMgr);
if (analysisDesc.getMetaData().getName() == null) {
analysisDesc.getMetaData().setName("Analysis for " + aConfiguration.getType());
}
// Scan components that accept the service and bind it to them
bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
// Set up UIMA context & logging
Logger logger = new UimaLoggingAdapter(ctx);
UimaContextAdmin uimaCtx = newUimaContext(logger, resMgr, newConfigurationManager());
// Set up reader
CollectionReaderDescription readerDesc = configuration.getCollectionReaderDescription(ctx);
if (readerDesc.getMetaData().getName() == null) {
readerDesc.getMetaData().setName("Reader for " + aConfiguration.getType());
}
Map<String, Object> addReaderParam = new HashMap<String, Object>();
addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
CollectionReader reader = produceCollectionReader(readerDesc, resMgr, addReaderParam);
// Set up analysis engine
AnalysisEngine engine;
if (analysisDesc.isPrimitive()) {
engine = new PrimitiveAnalysisEngine_impl();
} else {
engine = new AggregateAnalysisEngine_impl();
}
Map<String, Object> addEngineParam = new HashMap<String, Object>();
addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
engine.initialize(analysisDesc, addEngineParam);
// Now the setup is complete
ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
// Start recording
ctx.getLifeCycleManager().begin(ctx, aConfiguration);
// Run the experiment
// Apply the engine to all documents provided by the reader
List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
metaData.add(reader.getMetaData());
metaData.add(engine.getMetaData());
CAS cas = CasCreationUtils.createCas(metaData);
while (reader.hasNext()) {
reader.getNext(cas);
engine.process(cas);
String documentTitle = "";
Feature documentTitleFeature = cas.getDocumentAnnotation().getType().getFeatureByBaseName("documentTitle");
if (documentTitleFeature != null) {
documentTitle = cas.getDocumentAnnotation().getFeatureValueAsString(documentTitleFeature);
}
cas.reset();
Progress[] progresses = reader.getProgress();
if (progresses != null) {
for (Progress p : progresses) {
ctx.message("Progress " + readerDesc.getImplementationName() + " " + p.getCompleted() + "/" + p.getTotal() + " " + p.getUnit() + " " + "(" + documentTitle + ")");
}
}
}
// Shut down engine and reader
engine.collectionProcessComplete();
reader.close();
engine.destroy();
reader.destroy();
// End recording
ctx.getLifeCycleManager().complete(ctx, aConfiguration);
return ctx.getId();
} catch (LifeCycleException e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
} catch (Throwable e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw new ExecutionException(e);
} finally {
if (ctx != null) {
ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
}
}
}
Aggregations