Search in sources :

Example 46 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class CustomTypesTest method testProfType.

@Test
public void testProfType() throws Exception {
    TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescription("desc.types.TestTypeSystemDescriptor");
    CAS cas = CasCreationUtils.createCas(tsd, null, null);
    cas.setDocumentText("I listen to lectures by Prof. Gurevych sometimes.");
    TypeSystem ts = cas.getTypeSystem();
    Type profType = ts.getType("de.tud.Prof");
    Feature profNameFeature = profType.getFeatureByBaseName("fullName");
    Feature profBossFeature = profType.getFeatureByBaseName("boss");
    AnnotationFS proemel = cas.createAnnotation(profType, 0, 0);
    proemel.setStringValue(profNameFeature, "Hans Juergen Proeml");
    cas.addFsToIndexes(proemel);
    AnnotationFS gurevych = cas.createAnnotation(profType, 24, 38);
    gurevych.setStringValue(profNameFeature, "Iryna Gurevych");
    gurevych.setFeatureValue(profBossFeature, proemel);
    cas.addFsToIndexes(gurevych);
    for (String feature : Arrays.asList("fullName", "boss")) {
        Feature someFeature = gurevych.getType().getFeatureByBaseName(feature);
        if (someFeature.getRange().isPrimitive()) {
            String value = gurevych.getFeatureValueAsString(someFeature);
            System.out.println(value);
        } else {
            FeatureStructure value = gurevych.getFeatureValue(someFeature);
            System.out.printf("%s (%s)%n", value.getFeatureValueAsString(profNameFeature), value.getType());
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TypeSystem(org.apache.uima.cas.TypeSystem) Type(org.apache.uima.cas.Type) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) CAS(org.apache.uima.cas.CAS) Feature(org.apache.uima.cas.Feature) Test(org.junit.Test)

Example 47 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebannoTsv3Reader method addAnnotations.

/**
 * Importing span annotations including slot annotations.
 */
private void addAnnotations(JCas aJCas, Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> aAnnosPerTypePerUnit) {
    for (Type type : annotationsPerPostion.keySet()) {
        Map<AnnotationUnit, Map<Integer, AnnotationFS>> multiTokUnits = new HashMap<>();
        int ref = 1;
        // to see if it is on multiple token
        AnnotationFS prevAnnoFs = null;
        for (AnnotationUnit unit : annotationsPerPostion.get(type).keySet()) {
            int end = unit.end;
            List<AnnotationFS> annos = aAnnosPerTypePerUnit.get(type).get(unit);
            int j = 0;
            Feature linkeF = null;
            Map<AnnotationFS, List<FeatureStructure>> linkFSesPerSlotAnno = new HashMap<>();
            if (allLayers.get(type).size() == 0) {
                ref = addAnnotationWithNoFeature(aJCas, type, unit, annos, multiTokUnits, end, ref);
                continue;
            }
            for (Feature feat : allLayers.get(type)) {
                String anno = annotationsPerPostion.get(type).get(unit).get(j);
                if (!anno.equals("_")) {
                    int i = 0;
                    // if it is a slot annotation (multiple slots per
                    // single annotation
                    // (Target1<--role1--Base--role2-->Target2)
                    int slot = 0;
                    boolean targetAdd = false;
                    String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
                    String[] stackedAnnos = anno.split(stackedAnnoRegex);
                    for (String mAnnos : stackedAnnos) {
                        String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
                        for (String mAnno : mAnnos.split(multipleSlotAnno)) {
                            String depRef = "";
                            String multSpliter = "(?<!\\\\)" + Pattern.quote("[");
                            // is this slot target ambiguous?
                            boolean ambigTarget = false;
                            if (mAnno.split(multSpliter).length > 1) {
                                ambigTarget = true;
                                depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
                                ref = depRef.contains("_") ? ref : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
                                mAnno = mAnno.substring(0, mAnno.indexOf("["));
                            }
                            if (mAnno.equals("*")) {
                                mAnno = null;
                            }
                            boolean isMultitoken = false;
                            if (!multiTokUnits.isEmpty() && prevAnnoFs != null && prevAnnoFs.getBegin() != unit.begin) {
                                contAnno: for (AnnotationUnit u : multiTokUnits.keySet()) {
                                    for (Integer r : multiTokUnits.get(u).keySet()) {
                                        if (ref == r) {
                                            isMultitoken = true;
                                            prevAnnoFs = multiTokUnits.get(u).get(r);
                                            break contAnno;
                                        }
                                    }
                                }
                            }
                            if (isMultitoken) {
                                Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                prevAnnoFs.setIntValue(endF, end);
                                mAnno = getEscapeChars(mAnno);
                                prevAnnoFs.setFeatureValueFromString(feat, mAnno);
                                if (feat.getShortName().equals(REF_LINK)) {
                                    // since REF_REL do not start with BIO,
                                    // update it it...
                                    annos.set(i, prevAnnoFs);
                                }
                                setAnnoRefPerUnit(unit, type, ref, prevAnnoFs);
                            } else {
                                if (roleLinks.containsKey(feat)) {
                                    linkeF = feat;
                                    FeatureStructure link = aJCas.getCas().createFS(slotLinkTypes.get(feat));
                                    Feature roleFeat = link.getType().getFeatureByBaseName("role");
                                    mAnno = getEscapeChars(mAnno);
                                    link.setStringValue(roleFeat, mAnno);
                                    linkFSesPerSlotAnno.putIfAbsent(annos.get(i), new ArrayList<>());
                                    linkFSesPerSlotAnno.get(annos.get(i)).add(link);
                                } else if (roleTargets.containsKey(feat)) {
                                    FeatureStructure link = linkFSesPerSlotAnno.get(annos.get(i)).get(slot);
                                    int customTypeNumber = 0;
                                    if (mAnno.split("-").length > 2) {
                                        customTypeNumber = Integer.valueOf(mAnno.substring(mAnno.lastIndexOf("-") + 1));
                                        mAnno = mAnno.substring(0, mAnno.lastIndexOf("-"));
                                    }
                                    AnnotationUnit targetUnit = token2Units.get(mAnno);
                                    Type tType = null;
                                    if (customTypeNumber == 0) {
                                        tType = roleTargets.get(feat);
                                    } else {
                                        tType = layerMaps.get(customTypeNumber);
                                    }
                                    AnnotationFS targetFs;
                                    if (ambigTarget) {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).get(ref);
                                    } else {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).entrySet().iterator().next().getValue();
                                    }
                                    link.setFeatureValue(feat, targetFs);
                                    addSlotAnnotations(linkFSesPerSlotAnno, linkeF);
                                    targetAdd = true;
                                    slot++;
                                } else if (feat.getShortName().equals(REF_REL)) {
                                    int chainNo = Integer.valueOf(mAnno.split("->")[1].split("-")[0]);
                                    int LinkNo = Integer.valueOf(mAnno.split("->")[1].split("-")[1]);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    if (chainAnnosPerTyep.get(type).get(chainNo) != null && chainAnnosPerTyep.get(type).get(chainNo).get(LinkNo) != null) {
                                        continue;
                                    }
                                    String refRel = mAnno.split("->")[0];
                                    refRel = getEscapeChars(refRel);
                                    if (refRel.equals("*")) {
                                        refRel = null;
                                    }
                                    annos.get(i).setFeatureValueFromString(feat, refRel);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).putIfAbsent(chainNo, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).get(chainNo).put(LinkNo, annos.get(i));
                                } else if (feat.getShortName().equals(REF_LINK)) {
                                    mAnno = getEscapeChars(mAnno);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else if (depFeatures.get(type) != null && depFeatures.get(type).equals(feat)) {
                                    int g = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[0]);
                                    int d = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[1]);
                                    Type depType = depTypess.get(type);
                                    AnnotationUnit govUnit = token2Units.get(mAnno);
                                    int l = annotationsPerPostion.get(type).get(unit).size();
                                    String thisUnit = annotationsPerPostion.get(type).get(unit).get(l - 1);
                                    AnnotationUnit depUnit = token2Units.get(thisUnit);
                                    AnnotationFS govFs;
                                    AnnotationFS depFs;
                                    if (depType.getName().equals(POS.class.getName())) {
                                        depType = aJCas.getCas().getTypeSystem().getType(Token.class.getName());
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else // in WebAnno world :)(!
                                    if (depType.getName().equals(Token.class.getName())) {
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else if (g == 0 && d == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    } else if (g == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).get(d);
                                    } else {
                                        govFs = annosPerRef.get(depType).get(govUnit).get(g);
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    }
                                    annos.get(i).setFeatureValue(feat, depFs);
                                    annos.get(i).setFeatureValue(type.getFeatureByBaseName(GOVERNOR), govFs);
                                    if (depFs.getBegin() <= annos.get(i).getBegin()) {
                                        Feature beginF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN);
                                        annos.get(i).setIntValue(beginF, depFs.getBegin());
                                    } else {
                                        Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                        annos.get(i).setIntValue(endF, depFs.getEnd());
                                    }
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else {
                                    mAnno = getEscapeChars(mAnno);
                                    multiTokUnits.putIfAbsent(unit, new HashMap<>());
                                    multiTokUnits.get(unit).put(ref, annos.get(i));
                                    prevAnnoFs = annos.get(i);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                    setAnnoRefPerUnit(unit, type, ref, annos.get(i));
                                }
                            }
                            if (stackedAnnos.length > 1) {
                                ref++;
                            }
                        }
                        if (type.getName().equals(POS.class.getName())) {
                            units2Tokens.get(unit).setPos((POS) annos.get(i));
                        }
                        if (type.getName().equals(Lemma.class.getName())) {
                            units2Tokens.get(unit).setLemma((Lemma) annos.get(i));
                        }
                        if (type.getName().equals(Stem.class.getName())) {
                            units2Tokens.get(unit).setStem((Stem) annos.get(i));
                        }
                        if (type.getName().equals(MorphologicalFeatures.class.getName())) {
                            units2Tokens.get(unit).setMorph((MorphologicalFeatures) annos.get(i));
                        }
                        i++;
                    }
                    if (targetAdd) {
                        linkFSesPerSlotAnno = new HashMap<>();
                    }
                } else {
                    prevAnnoFs = null;
                }
                j++;
            }
            if (prevAnnoFs != null) {
                ref++;
            }
        }
        annosPerRef.put(type, multiTokUnits);
    }
}
Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Feature(org.apache.uima.cas.Feature) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 48 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebannoTsv3Reader method addSlotAnnotations.

/**
 * update a base annotation with slot annotations
 *
 * @param linkFSesPerAnno
 *            contains list of slot annotations per a base annotation
 * @param aLinkeF
 *            The link slot annotation feature
 */
private void addSlotAnnotations(Map<AnnotationFS, List<FeatureStructure>> linkFSesPerAnno, Feature aLinkeF) {
    for (AnnotationFS anno : linkFSesPerAnno.keySet()) {
        ArrayFS array = anno.getCAS().createArrayFS(linkFSesPerAnno.get(anno).size());
        array.copyFromArray(linkFSesPerAnno.get(anno).toArray(new FeatureStructure[linkFSesPerAnno.get(anno).size()]), 0, 0, linkFSesPerAnno.get(anno).size());
        anno.setFeatureValue(aLinkeF, array);
        anno.getCAS().addFsToIndexes(anno);
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) ArrayFS(org.apache.uima.cas.ArrayFS)

Example 49 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebannoTsv3Writer method setChainAnnotation.

private void setChainAnnotation(JCas aJCas) {
    for (String l : chainLayers) {
        if (l.equals(Token.class.getName())) {
            continue;
        }
        Map<AnnotationUnit, List<List<String>>> annotationsPertype = null;
        Type type = getType(aJCas.getCas(), l + CHAIN);
        Feature chainFirst = type.getFeatureByBaseName(FIRST);
        int chainNo = 1;
        for (FeatureStructure chainFs : selectFS(aJCas.getCas(), type)) {
            AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
            AnnotationUnit unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
            Type lType = linkFs.getType();
            // this is the layer with annotations
            l = lType.getName();
            if (annotationsPerPostion.get(l) == null) {
                annotationsPertype = new HashMap<>();
            } else {
                annotationsPertype = annotationsPerPostion.get(l);
            }
            Feature linkNext = linkFs.getType().getFeatureByBaseName(NEXT);
            int linkNo = 1;
            while (linkFs != null) {
                AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);
                if (nextLinkFs != null) {
                    addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                } else {
                    addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                }
                linkFs = nextLinkFs;
                linkNo++;
                if (nextLinkFs != null) {
                    unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
                }
            }
            if (annotationsPertype.keySet().size() > 0) {
                annotationsPerPostion.put(l, annotationsPertype);
            }
            chainNo++;
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) ArrayList(java.util.ArrayList) List(java.util.List) Feature(org.apache.uima.cas.Feature)

Example 50 with FeatureStructure

use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testSimpleCrossSenenceSlotFeature.

@Test
public void testSimpleCrossSenenceSlotFeature() throws Exception {
    JCas jcas = makeJCasTwoSentences();
    CAS cas = jcas.getCas();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(6);
    Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd());
    cas.addFsToIndexes(s2);
    AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s3);
    FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
    FeatureStructure link2 = makeLinkFS(jcas, "p2", s3);
    makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Test(org.junit.Test)

Aggregations

FeatureStructure (org.apache.uima.cas.FeatureStructure)60 Type (org.apache.uima.cas.Type)38 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)37 ArrayList (java.util.ArrayList)29 JCas (org.apache.uima.jcas.JCas)20 Feature (org.apache.uima.cas.Feature)17 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)16 CAS (org.apache.uima.cas.CAS)16 Test (org.junit.Test)16 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)13 List (java.util.List)12 ArrayFS (org.apache.uima.cas.ArrayFS)8 Arrays.asList (java.util.Arrays.asList)6 LinkedHashMap (java.util.LinkedHashMap)6 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)6 AnnotationLayer (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)5 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)4 LogMessage (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage)4 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)4 Map (java.util.Map)4