Search in sources :

Example 81 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv2Reader method createSpanAnnotation.

private void createSpanAnnotation(JCas aJcas, int aTokenStart, Map<Type, Set<Feature>> aLayers, Map<Type, Type> aRelationayers, Map<Type, Map<Integer, AnnotationFS>> aAnnotations, Map<Type, Map<Integer, String>> aBeginEndAnno, Map<Type, Map<String, List<AnnotationFS>>> aTokenAnnotations, Map<Type, Map<String, List<String>>> aRelationTargets, StringTokenizer lineTk, String aToken, String aTokenNumberColumn) {
    for (Type layer : aLayers.keySet()) {
        int lastIndex = 1;
        // if a layer is bound to a single token but has multiple feature
        // annotation is created once and feature values be appended
        Map<Integer, AnnotationFS> singleTokenMultiFeature = new HashMap<>();
        // The relation line number should be read once all feature columns
        // are obtained
        int numberOfFeaturesPerLayer = aLayers.get(layer).size();
        for (Feature feature : aLayers.get(layer)) {
            numberOfFeaturesPerLayer--;
            int index = 1;
            String multipleAnnotations = lineTk.nextToken();
            String relationTargetNumbers = null;
            if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
                relationTargetNumbers = lineTk.nextToken();
            }
            int i = 0;
            String[] relationTargets = null;
            if (relationTargetNumbers != null) {
                relationTargets = relationTargetNumbers.split("\\|");
            }
            for (String annotation : multipleAnnotations.split("\\|")) {
                // If annotation is not on multpile spans
                if (!(annotation.startsWith("B-") || annotation.startsWith("I-") || annotation.startsWith("O-")) && !(annotation.equals("_") || annotation.equals("O"))) {
                    AnnotationFS newAnnotation;
                    // annotation only once
                    if (singleTokenMultiFeature.get(index) == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                        singleTokenMultiFeature.put(index, newAnnotation);
                    } else {
                        newAnnotation = singleTokenMultiFeature.get(index);
                    }
                    // stripped out - make it null
                    if (annotation.startsWith(layer.getName())) {
                        annotation = null;
                    }
                    newAnnotation.setFeatureValueFromString(feature, annotation);
                    aJcas.addFsToIndexes(newAnnotation);
                    // Set the POS to the token
                    if (layer.getName().equals(POS.class.getName())) {
                        indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setPos((POS) newAnnotation);
                    }
                    // Set the Lemma to the token
                    if (layer.getName().equals(Lemma.class.getName())) {
                        indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setLemma((Lemma) newAnnotation);
                    }
                    if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
                        Map<String, List<String>> targets = aRelationTargets.get(layer);
                        if (targets == null) {
                            List<String> governors = new ArrayList<>();
                            governors.add(relationTargets[i]);
                            targets = new HashMap<>();
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        } else {
                            List<String> governors = targets.get(aTokenNumberColumn);
                            if (governors == null) {
                                governors = new ArrayList<>();
                            }
                            governors.add(relationTargets[i]);
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        }
                    }
                    Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
                    if (tokenAnnotations == null) {
                        tokenAnnotations = new HashMap<>();
                    }
                    List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
                    if (relAnnos == null) {
                        relAnnos = new ArrayList<>();
                    }
                    relAnnos.add(newAnnotation);
                    tokenAnnotations.put(aTokenNumberColumn, relAnnos);
                    aTokenAnnotations.put(layer, tokenAnnotations);
                    index++;
                } else // O-_ is a position marker
                if (annotation.equals("O-_") || annotation.equals("B-_") || annotation.equals("I-_")) {
                    index++;
                } else if (annotation.startsWith("B-")) {
                    boolean isNewAnnotation = true;
                    Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
                    Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
                    AnnotationFS newAnnotation;
                    if (indexedAnnos == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                        indexedAnnos = new LinkedHashMap<>();
                        indexedBeginEndAnnos = new LinkedHashMap<>();
                    } else if (indexedAnnos.get(index) == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else if (indexedAnnos.get(index) != null && indexedBeginEndAnnos.get(index).equals("E-")) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else // annotation
                    if (indexedBeginEndAnnos.get(index).equals("I-")) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else {
                        newAnnotation = indexedAnnos.get(index);
                        isNewAnnotation = false;
                    }
                    // remove prefixes such as B-/I- before creating the
                    // annotation
                    annotation = (annotation.substring(2));
                    if (annotation.startsWith(layer.getName())) {
                        annotation = null;
                    }
                    newAnnotation.setFeatureValueFromString(feature, annotation);
                    aJcas.addFsToIndexes(newAnnotation);
                    indexedAnnos.put(index, newAnnotation);
                    indexedBeginEndAnnos.put(index, "B-");
                    aAnnotations.put(layer, indexedAnnos);
                    if (aRelationayers.containsKey(layer)) {
                        Map<String, List<String>> targets = aRelationTargets.get(layer);
                        if (targets == null) {
                            List<String> governors = new ArrayList<>();
                            governors.add(relationTargets[i]);
                            targets = new HashMap<>();
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        } else {
                            List<String> governors = targets.get(aTokenNumberColumn);
                            if (governors == null) {
                                governors = new ArrayList<>();
                            }
                            governors.add(relationTargets[i]);
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        }
                    }
                    Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
                    if (isNewAnnotation) {
                        if (tokenAnnotations == null) {
                            tokenAnnotations = new HashMap<>();
                        }
                        List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
                        if (relAnnos == null) {
                            relAnnos = new ArrayList<>();
                        }
                        relAnnos.add(newAnnotation);
                        tokenAnnotations.put(aTokenNumberColumn, relAnnos);
                        aTokenAnnotations.put(layer, tokenAnnotations);
                    }
                    aBeginEndAnno.put(layer, indexedBeginEndAnnos);
                    index++;
                } else if (annotation.startsWith("I-")) {
                    // beginEndAnnotation.put(layer, "I-");
                    Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
                    indexedBeginEndAnnos.put(index, "I-");
                    aBeginEndAnno.put(layer, indexedBeginEndAnnos);
                    Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
                    AnnotationFS newAnnotation = indexedAnnos.get(index);
                    ((Annotation) newAnnotation).setEnd(aTokenStart + aToken.length());
                    index++;
                } else {
                    aAnnotations.put(layer, null);
                    index++;
                }
            }
            lastIndex = index - 1;
        }
        // tokens annotated as B-X B-X, no B-I means it is end by itself
        for (int i = 1; i <= lastIndex; i++) {
            if (aBeginEndAnno.get(layer) != null && aBeginEndAnno.get(layer).get(i) != null && aBeginEndAnno.get(layer).get(i).equals("B-")) {
                aBeginEndAnno.get(layer).put(i, "E-");
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 82 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class TypeSystemAnalysis method isSlotFeature.

private boolean isSlotFeature(TypeSystem aTS, Feature aFeature) {
    // Slot features are multi-valued
    if (!FSUtil.isMultiValuedFeature(aTS, aFeature)) {
        return false;
    }
    // The component type is the link type - it must be present
    Type linkType = aFeature.getRange().getComponentType();
    if (linkType == null) {
        return false;
    }
    // The range of the slot feature is its link type which must inherit from TOP
    if (!aTS.getTopType().equals(aTS.getParent(linkType))) {
        return false;
    }
    // The link feature must have exactly two features (link-with-role)
    if (linkType.getFeatures().size() != 2) {
        return false;
    }
    Optional<Feature> roleFeature = linkType.getFeatures().stream().filter(f -> f.getRange().getName().equals(CAS.TYPE_NAME_STRING)).findFirst();
    if (!roleFeature.isPresent()) {
        return false;
    }
    Optional<Feature> linkFeature = linkType.getFeatures().stream().filter(f -> !f.getRange().isPrimitive()).findFirst();
    if (!linkFeature.isPresent()) {
        return false;
    }
    // Hm, ok, so this looks like a slot feature.
    return true;
}
Also used : FEAT_REL_SOURCE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_SOURCE) FEAT_REL_TARGET(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_TARGET) TypeDescription(org.apache.uima.resource.metadata.TypeDescription) LoggerFactory(org.slf4j.LoggerFactory) CAS(org.apache.uima.cas.CAS) Feature(org.apache.uima.cas.Feature) HashMap(java.util.HashMap) LinkMode(de.tudarmstadt.ukp.clarin.webanno.model.LinkMode) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) FSUtil(org.apache.uima.fit.util.FSUtil) ArrayList(java.util.ArrayList) Type(org.apache.uima.cas.Type) HashSet(java.util.HashSet) ArrayListValuedHashMap(org.apache.commons.collections4.multimap.ArrayListValuedHashMap) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) Arrays.asList(java.util.Arrays.asList) ListValuedMap(org.apache.commons.collections4.ListValuedMap) Map(java.util.Map) TypeSystem(org.apache.uima.cas.TypeSystem) StringUtils.trimToNull(org.apache.commons.lang3.StringUtils.trimToNull) Logger(org.slf4j.Logger) MultiValueMode(de.tudarmstadt.ukp.clarin.webanno.model.MultiValueMode) WebAnnoConst(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst) Set(java.util.Set) RELATION_TYPE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.RELATION_TYPE) SPAN_TYPE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.SPAN_TYPE) Collectors(java.util.stream.Collectors) CasCreationUtils(org.apache.uima.util.CasCreationUtils) List(java.util.List) FeatureDescription(org.apache.uima.resource.metadata.FeatureDescription) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) StringUtils.removeEnd(org.apache.commons.lang3.StringUtils.removeEnd) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) CHAIN_TYPE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.CHAIN_TYPE) Optional(java.util.Optional) Type(org.apache.uima.cas.Type) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 83 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class RemoveDanglingRelationsRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    Set<FeatureStructure> nonIndexed = getNonIndexedFSes(aCas);
    Set<FeatureStructure> toDelete = new LinkedHashSet<>();
    for (AnnotationFS fs : aCas.getAnnotationIndex()) {
        Type t = fs.getType();
        Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
        Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
        // Is this a relation?
        if (!(sourceFeat != null && targetFeat != null)) {
            continue;
        }
        FeatureStructure source = fs.getFeatureValue(sourceFeat);
        FeatureStructure target = fs.getFeatureValue(targetFeat);
        // Does it point to deleted spans?
        if (nonIndexed.contains(source) || nonIndexed.contains(target)) {
            toDelete.add(fs);
        }
    }
    // Delete those relations that pointed to deleted spans
    if (!toDelete.isEmpty()) {
        toDelete.forEach(aCas::removeFsFromIndexes);
        aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed [%d] dangling relations.", nonIndexed.size()));
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) LinkedHashSet(java.util.LinkedHashSet) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) Feature(org.apache.uima.cas.Feature)

Example 84 with Feature

use of org.apache.uima.cas.Feature in project dkpro-lab by dkpro.

the class SimpleExecutionEngine method run.

@Override
public String run(Task aConfiguration) throws ExecutionException, LifeCycleException {
    if (!(aConfiguration instanceof UimaTask)) {
        throw new ExecutionException("This engine can only execute [" + UimaTask.class.getName() + "]");
    }
    UimaTask configuration = (UimaTask) aConfiguration;
    // Create persistence service for injection into analysis components
    TaskContext ctx = contextFactory.createContext(aConfiguration);
    try {
        ResourceManager resMgr = newDefaultResourceManager();
        // Make sure the descriptor is fully resolved. It will be modified and
        // thus should not be modified again afterwards by UIMA.
        AnalysisEngineDescription analysisDesc = configuration.getAnalysisEngineDescription(ctx);
        analysisDesc.resolveImports(resMgr);
        if (analysisDesc.getMetaData().getName() == null) {
            analysisDesc.getMetaData().setName("Analysis for " + aConfiguration.getType());
        }
        // Scan components that accept the service and bind it to them
        bindResource(analysisDesc, TaskContext.class, TaskContextProvider.class, TaskContextProvider.PARAM_FACTORY_NAME, contextFactory.getId(), TaskContextProvider.PARAM_CONTEXT_ID, ctx.getId());
        // Set up UIMA context & logging
        Logger logger = new UimaLoggingAdapter(ctx);
        UimaContextAdmin uimaCtx = newUimaContext(logger, resMgr, newConfigurationManager());
        // Set up reader
        CollectionReaderDescription readerDesc = configuration.getCollectionReaderDescription(ctx);
        if (readerDesc.getMetaData().getName() == null) {
            readerDesc.getMetaData().setName("Reader for " + aConfiguration.getType());
        }
        Map<String, Object> addReaderParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        CollectionReader reader = produceCollectionReader(readerDesc, resMgr, addReaderParam);
        // Set up analysis engine
        AnalysisEngine engine;
        if (analysisDesc.isPrimitive()) {
            engine = new PrimitiveAnalysisEngine_impl();
        } else {
            engine = new AggregateAnalysisEngine_impl();
        }
        Map<String, Object> addEngineParam = new HashMap<String, Object>();
        addReaderParam.put(Resource.PARAM_UIMA_CONTEXT, uimaCtx);
        addReaderParam.put(Resource.PARAM_RESOURCE_MANAGER, resMgr);
        engine.initialize(analysisDesc, addEngineParam);
        // Now the setup is complete
        ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
        // Start recording
        ctx.getLifeCycleManager().begin(ctx, aConfiguration);
        // Run the experiment
        // Apply the engine to all documents provided by the reader
        List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
        metaData.add(reader.getMetaData());
        metaData.add(engine.getMetaData());
        CAS cas = CasCreationUtils.createCas(metaData);
        while (reader.hasNext()) {
            reader.getNext(cas);
            engine.process(cas);
            String documentTitle = "";
            Feature documentTitleFeature = cas.getDocumentAnnotation().getType().getFeatureByBaseName("documentTitle");
            if (documentTitleFeature != null) {
                documentTitle = cas.getDocumentAnnotation().getFeatureValueAsString(documentTitleFeature);
            }
            cas.reset();
            Progress[] progresses = reader.getProgress();
            if (progresses != null) {
                for (Progress p : progresses) {
                    ctx.message("Progress " + readerDesc.getImplementationName() + " " + p.getCompleted() + "/" + p.getTotal() + " " + p.getUnit() + " " + "(" + documentTitle + ")");
                }
            }
        }
        // Shut down engine and reader
        engine.collectionProcessComplete();
        reader.close();
        engine.destroy();
        reader.destroy();
        // End recording
        ctx.getLifeCycleManager().complete(ctx, aConfiguration);
        return ctx.getId();
    } catch (LifeCycleException e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw e;
    } catch (Throwable e) {
        ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
        throw new ExecutionException(e);
    } finally {
        if (ctx != null) {
            ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LifeCycleException(org.dkpro.lab.engine.LifeCycleException) Logger(org.apache.uima.util.Logger) Feature(org.apache.uima.cas.Feature) PrimitiveAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl) UimaTask(org.dkpro.lab.uima.task.UimaTask) ExecutionException(org.dkpro.lab.engine.ExecutionException) UimaLoggingAdapter(org.dkpro.lab.uima.task.impl.UimaLoggingAdapter) Progress(org.apache.uima.util.Progress) TaskContext(org.dkpro.lab.engine.TaskContext) UIMAFramework.produceCollectionReader(org.apache.uima.UIMAFramework.produceCollectionReader) CollectionReader(org.apache.uima.collection.CollectionReader) ResourceManager(org.apache.uima.resource.ResourceManager) UIMAFramework.newDefaultResourceManager(org.apache.uima.UIMAFramework.newDefaultResourceManager) AggregateAnalysisEngine_impl(org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) CAS(org.apache.uima.cas.CAS) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) UimaContextAdmin(org.apache.uima.UimaContextAdmin) ResourceMetaData(org.apache.uima.resource.metadata.ResourceMetaData) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Aggregations

Feature (org.apache.uima.cas.Feature)84 Type (org.apache.uima.cas.Type)62 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)50 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)48 ArrayList (java.util.ArrayList)23 FeatureStructure (org.apache.uima.cas.FeatureStructure)18 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)18 JCas (org.apache.uima.jcas.JCas)18 List (java.util.List)15 Test (org.junit.Test)14 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)13 WebAnnoCasUtil.setFeature (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature)12 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)12 CAS (org.apache.uima.cas.CAS)10 HashSet (java.util.HashSet)8 LinkedHashMap (java.util.LinkedHashMap)8 Map (java.util.Map)8 HashMap (java.util.HashMap)7 TypeSystem (org.apache.uima.cas.TypeSystem)7 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)6