Search in sources :

Example 1 with LowLevelCAS

use of org.apache.uima.cas.impl.LowLevelCAS in project webanno by webanno.

the class WebannoTsv2Writer method setRelationFeatureAnnos.

private void setRelationFeatureAnnos(CAS aCas, Map<Integer, String> aRelAnnoMap, Type aType, Feature aFeature) throws CASRuntimeException, CASException {
    LowLevelCAS llCas = aCas.getLowLevelCAS();
    Feature dependent = null;
    AnnotationFS temp = null;
    for (Feature feature : aType.getFeatures()) {
        if (feature.getShortName().equals(DEPENDENT)) {
            dependent = feature;
        }
    }
    for (AnnotationFS annoFs : CasUtil.select(aCas, aType)) {
        // relation annotation will be from Governor to Dependent
        // Entry done on Dependent side
        temp = annoFs;
        annoFs = (AnnotationFS) annoFs.getFeatureValue(dependent);
        boolean first = true;
        for (Token token : selectCovered(aCas.getJCas(), Token.class, annoFs.getBegin(), annoFs.getEnd())) {
            if (annoFs.getBegin() <= token.getBegin() && annoFs.getEnd() >= token.getEnd()) {
                annoFs = temp;
                String annotation = annoFs.getFeatureValueAsString(aFeature);
                if (annotation == null) {
                    annotation = aType.getName() + "_";
                }
                if (aRelAnnoMap.get(llCas.ll_getFSRef(token)) == null) {
                    if (!multipleSpans.contains(aType.getName())) {
                        aRelAnnoMap.put(llCas.ll_getFSRef(token), annotation);
                    } else {
                        aRelAnnoMap.put(llCas.ll_getFSRef(token), (first ? "B-" : "I-") + annotation);
                        first = false;
                    }
                } else {
                    if (!multipleSpans.contains(aType.getName())) {
                        aRelAnnoMap.put(llCas.ll_getFSRef(token), aRelAnnoMap.get(llCas.ll_getFSRef(token)) + "|" + annotation);
                    } else {
                        aRelAnnoMap.put(llCas.ll_getFSRef(token), aRelAnnoMap.get(llCas.ll_getFSRef(token)) + "|" + (first ? "B-" : "I-") + annotation);
                        first = false;
                    }
                }
            }
            // we just need an arc to the first token.
            break;
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS) Feature(org.apache.uima.cas.Feature)

Example 2 with LowLevelCAS

use of org.apache.uima.cas.impl.LowLevelCAS in project webanno by webanno.

the class WebannoTsv2Writer method convertToTsv.

private void convertToTsv(JCas aJCas, OutputStream aOs, String aEncoding) throws IOException, ResourceInitializationException, CASRuntimeException, CASException {
    LowLevelCAS llCas = aJCas.getLowLevelCas();
    tokenIds = new HashMap<>();
    setTokenId(aJCas, tokenIds);
    tokenPositions = new TreeMap<>();
    setTokenPosition(aJCas, tokenPositions);
    Map<Integer, Integer> getTokensPerSentence = new TreeMap<>();
    setTokenSentenceAddress(aJCas, getTokensPerSentence);
    // list of annotation types
    Set<Type> allTypes = new LinkedHashSet<>();
    for (Annotation a : select(aJCas, Annotation.class)) {
        if (!(a instanceof Token || a instanceof Sentence || a instanceof DocumentMetaData || a instanceof TagsetDescription || a instanceof CoreferenceLink)) {
            allTypes.add(a.getType());
        }
    }
    Set<Type> relationTypes = new LinkedHashSet<>();
    // get all arc types
    for (Type type : allTypes) {
        if (type.getFeatures().size() == 0) {
            continue;
        }
        for (Feature feature : type.getFeatures()) {
            if (feature.getShortName().equals(GOVERNOR)) {
                relationTypes.add(type);
                break;
            }
        }
    }
    allTypes.removeAll(relationTypes);
    // relation annotations
    Map<Type, String> relationTypesMap = new HashMap<>();
    for (Type type : relationTypes) {
        if (type.getName().equals(Dependency.class.getName())) {
            relationTypesMap.put(type, POS.class.getName());
            continue;
        }
        for (AnnotationFS anno : CasUtil.select(aJCas.getCas(), type)) {
            for (Feature feature : type.getFeatures()) {
                if (feature.getShortName().equals(GOVERNOR)) {
                    relationTypesMap.put(type, anno.getFeatureValue(feature).getType().getName());
                }
            }
        }
    }
    // all span annotation first
    Map<Feature, Type> spanFeatures = new LinkedHashMap<>();
    allTypes: for (Type type : allTypes) {
        if (type.getFeatures().size() == 0) {
            continue;
        }
        for (Feature feature : type.getFeatures()) {
            // coreference annotation not supported
            if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue allTypes;
            }
        }
        IOUtils.write(" # " + type.getName(), aOs, aEncoding);
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
                continue;
            }
            spanFeatures.put(feature, type);
            IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
        }
    }
    // write all relation annotation first
    Set<Feature> relationFeatures = new LinkedHashSet<>();
    for (Type type : relationTypes) {
        IOUtils.write(" # " + type.getName(), aOs, aEncoding);
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                continue;
            }
            relationFeatures.add(feature);
            IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
        }
        // Add the attach type for the realtion anotation
        IOUtils.write(" | AttachTo=" + relationTypesMap.get(type), aOs, aEncoding);
    }
    IOUtils.write("\n", aOs, aEncoding);
    Map<Feature, Map<Integer, String>> allAnnos = new HashMap<>();
    allTypes: for (Type type : allTypes) {
        for (Feature feature : type.getFeatures()) {
            // coreference annotation not supported
            if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue allTypes;
            }
        }
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
                continue;
            }
            Map<Integer, String> tokenAnnoMap = new TreeMap<>();
            setTokenAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
            allAnnos.put(feature, tokenAnnoMap);
        }
    }
    // get tokens where dependents are drown to
    Map<Feature, Map<Integer, String>> relAnnos = new HashMap<>();
    for (Type type : relationTypes) {
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                continue;
            }
            Map<Integer, String> tokenAnnoMap = new HashMap<>();
            setRelationFeatureAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
            relAnnos.put(feature, tokenAnnoMap);
        }
    }
    // get tokens where dependents are drown from - the governor
    Map<Type, Map<Integer, String>> governorAnnos = new HashMap<>();
    for (Type type : relationTypes) {
        Map<Integer, String> govAnnoMap = new HashMap<>();
        setRelationGovernorPos(aJCas.getCas(), govAnnoMap, type);
        governorAnnos.put(type, govAnnoMap);
    }
    int sentId = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        IOUtils.write("#id=" + sentId++ + "\n", aOs, aEncoding);
        IOUtils.write("#text=" + sentence.getCoveredText().replace("\n", "") + "\n", aOs, aEncoding);
        for (Token token : selectCovered(Token.class, sentence)) {
            IOUtils.write(tokenIds.get(llCas.ll_getFSRef(token)) + "\t" + token.getCoveredText() + "\t", aOs, aEncoding);
            // all span annotations on this token
            for (Feature feature : spanFeatures.keySet()) {
                String annos = allAnnos.get(feature).get(llCas.ll_getFSRef(token));
                if (annos == null) {
                    if (multipleSpans.contains(spanFeatures.get(feature).getName())) {
                        IOUtils.write("O\t", aOs, aEncoding);
                    } else {
                        IOUtils.write("_\t", aOs, aEncoding);
                    }
                } else {
                    IOUtils.write(annos + "\t", aOs, aEncoding);
                }
            }
            for (Type type : relationTypes) {
                for (Feature feature : type.getFeatures()) {
                    if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                        continue;
                    }
                    String annos = relAnnos.get(feature).get(llCas.ll_getFSRef(token));
                    if (annos == null) {
                        IOUtils.write("_\t", aOs, aEncoding);
                    } else {
                        IOUtils.write(annos + "\t", aOs, aEncoding);
                    }
                }
                // the governor positions
                String govPos = governorAnnos.get(type).get(llCas.ll_getFSRef(token));
                if (govPos == null) {
                    IOUtils.write("_\t", aOs, aEncoding);
                } else {
                    IOUtils.write(governorAnnos.get(type).get(llCas.ll_getFSRef(token)) + "\t", aOs, aEncoding);
                }
            }
            IOUtils.write("\n", aOs, aEncoding);
        }
        IOUtils.write("\n", aOs, aEncoding);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.apache.uima.cas.Feature) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) TreeMap(java.util.TreeMap) Annotation(org.apache.uima.jcas.tcas.Annotation) Type(org.apache.uima.cas.Type) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap)

Example 3 with LowLevelCAS

use of org.apache.uima.cas.impl.LowLevelCAS in project webanno by webanno.

the class WebannoTsv2Writer method setTokenId.

private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) {
    LowLevelCAS llCas = aJCas.getLowLevelCas();
    int sentenceId = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        int tokenId = 1;
        for (Token token : selectCovered(Token.class, sentence)) {
            aTokenAddress.put(llCas.ll_getFSRef(token), sentenceId + "-" + tokenId++);
        }
        sentenceId++;
    }
}
Also used : Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 4 with LowLevelCAS

use of org.apache.uima.cas.impl.LowLevelCAS in project webanno by webanno.

the class WebannoTsv2Writer method setRelationGovernorPos.

private void setRelationGovernorPos(CAS aCas, Map<Integer, String> aRelationGovernorMap, Type aType) throws CASRuntimeException, CASException {
    Feature governor = null, dependent = null;
    AnnotationFS temp = null;
    for (Feature feature : aType.getFeatures()) {
        if (feature.getShortName().equals(GOVERNOR)) {
            governor = feature;
        }
        if (feature.getShortName().equals(DEPENDENT)) {
            dependent = feature;
        }
    }
    LowLevelCAS llCas = aCas.getLowLevelCAS();
    for (AnnotationFS anno : CasUtil.select(aCas, aType)) {
        // relation annotation will be from Governor to Dependent
        // Entry done on Dependent side
        temp = anno;
        anno = (AnnotationFS) anno.getFeatureValue(dependent);
        for (Token token : selectCovered(aCas.getJCas(), Token.class, anno.getBegin(), anno.getEnd())) {
            if (anno.getBegin() <= token.getBegin() && anno.getEnd() >= token.getEnd()) {
                if (aRelationGovernorMap.get(llCas.ll_getFSRef(token)) == null) {
                    AnnotationFS govAnno = (AnnotationFS) temp.getFeatureValue(governor);
                    aRelationGovernorMap.put(llCas.ll_getFSRef(token), tokenIds.get(tokenPositions.floorEntry(govAnno.getBegin()).getValue()));
                } else {
                    AnnotationFS govAnno = (AnnotationFS) temp.getFeatureValue(governor);
                    aRelationGovernorMap.put(llCas.ll_getFSRef(token), aRelationGovernorMap.get(llCas.ll_getFSRef(token)) + "|" + tokenIds.get(tokenPositions.floorEntry(govAnno.getBegin()).getValue()));
                }
            }
            // we just need an arc to the first token.
            break;
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.apache.uima.cas.Feature) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS)

Example 5 with LowLevelCAS

use of org.apache.uima.cas.impl.LowLevelCAS in project webanno by webanno.

the class CasDoctorUtils method collectReachable.

public static Set<FeatureStructure> collectReachable(CAS aCas) {
    LowLevelCAS llcas = aCas.getLowLevelCAS();
    Set<FeatureStructure> fses = new TreeSet<>(Comparator.comparingInt(llcas::ll_getFSRef));
    FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(aCas.getTypeSystem().getTopType());
    i.forEachRemaining(fs -> collect(fses, fs));
    return fses;
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) TreeSet(java.util.TreeSet) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS)

Aggregations

LowLevelCAS (org.apache.uima.cas.impl.LowLevelCAS)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)5 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)4 Feature (org.apache.uima.cas.Feature)3 FeatureStructure (org.apache.uima.cas.FeatureStructure)3 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)2 TreeMap (java.util.TreeMap)2 TreeSet (java.util.TreeSet)2 CoreferenceLink (de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink)1 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)1 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)1 TagsetDescription (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription)1 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 Map (java.util.Map)1 NavigableMap (java.util.NavigableMap)1 Type (org.apache.uima.cas.Type)1 TypeSystem (org.apache.uima.cas.TypeSystem)1