Search in sources :

Example 6 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv3Reader method addAnnotationWithNoFeature.

private int addAnnotationWithNoFeature(JCas aJCas, Type aType, AnnotationUnit aUnit, List<AnnotationFS> aAnnos, Map<AnnotationUnit, Map<Integer, AnnotationFS>> aMultiTokUnits, int aEnd, int aRef) {
    String anno = annotationsPerPostion.get(aType).get(aUnit).get(0);
    if (!anno.equals("_")) {
        int i = 0;
        String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
        for (String mAnnos : anno.split(stackedAnnoRegex)) {
            String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
            for (String mAnno : mAnnos.split(multipleSlotAnno)) {
                String depRef = "";
                if (mAnno.endsWith("]")) {
                    depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
                    aRef = depRef.contains("_") ? 0 : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
                    mAnno = mAnno.substring(0, mAnno.indexOf("["));
                }
                boolean isMultitoken = false;
                AnnotationFS multiAnnoFs = null;
                if (!aMultiTokUnits.isEmpty()) {
                    for (AnnotationUnit u : aMultiTokUnits.keySet()) {
                        for (Integer r : aMultiTokUnits.get(u).keySet()) {
                            if (aRef == r) {
                                isMultitoken = true;
                                multiAnnoFs = aMultiTokUnits.get(u).get(r);
                                break;
                            }
                        }
                    }
                }
                if (isMultitoken) {
                    Feature endF = aType.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                    multiAnnoFs.setIntValue(endF, aEnd);
                    setAnnoRefPerUnit(aUnit, aType, aRef, multiAnnoFs);
                } else {
                    aMultiTokUnits.putIfAbsent(aUnit, new HashMap<>());
                    aMultiTokUnits.get(aUnit).put(aRef, aAnnos.get(i));
                    aJCas.addFsToIndexes(aAnnos.get(i));
                    setAnnoRefPerUnit(aUnit, aType, aRef, aAnnos.get(i));
                }
                aRef++;
            }
            i++;
        }
    }
    return aRef;
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Feature(org.apache.uima.cas.Feature)

Example 7 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv3Reader method setAnnosPerTypePerUnit.

/**
 * Gets annotations from lines (of {@link AnnotationUnit}s) and save for the later access, while
 * reading the document the first time. <br>
 *
 * @param lines
 *            TSV lines exported from WebAnno
 * @param unit
 *            the annotation unit (Token or sub-tokens)
 * @param ind
 *            index of the annotation, from the TAB separated annotations in the TSV lines
 */
private void setAnnosPerTypePerUnit(String[] lines, AnnotationUnit unit, int ind) {
    for (Type type : allLayers.keySet()) {
        annotationsPerPostion.putIfAbsent(type, new LinkedHashMap<>());
        if (allLayers.get(type).size() == 0) {
            annotationsPerPostion.get(type).put(unit, annotationsPerPostion.get(type).getOrDefault(unit, new ArrayList<>()));
            annotationsPerPostion.get(type).get(unit).add(lines[ind]);
            ind++;
            continue;
        }
        for (Feature f : allLayers.get(type)) {
            annotationsPerPostion.get(type).put(unit, annotationsPerPostion.get(type).getOrDefault(unit, new ArrayList<>()));
            annotationsPerPostion.get(type).get(unit).add(lines[ind]);
            ind++;
        }
        // Add at the last position the line number
        // It will be used to get Annotation unit
        annotationsPerPostion.get(type).get(unit).add(lines[0]);
    }
}
Also used : Type(org.apache.uima.cas.Type) ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature)

Example 8 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv3Writer method setRelationAnnotation.

private void setRelationAnnotation(JCas aJCas) {
    for (String l : relationLayers) {
        if (l.equals(Token.class.getName())) {
            continue;
        }
        Map<AnnotationUnit, List<List<String>>> annotationsPertype;
        if (annotationsPerPostion.get(l) == null) {
            annotationsPertype = new HashMap<>();
        } else {
            annotationsPertype = annotationsPerPostion.get(l);
        }
        Type type = getType(aJCas.getCas(), l);
        Feature dependentFeature = null;
        Feature governorFeature = null;
        for (Feature feature : type.getFeatures()) {
            if (feature.getShortName().equals(DEPENDENT)) {
                // check if the dependent is
                dependentFeature = feature;
            }
            if (feature.getShortName().equals(GOVERNOR)) {
                governorFeature = feature;
            }
        }
        for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
            AnnotationFS depFs = (AnnotationFS) fs.getFeatureValue(dependentFeature);
            AnnotationFS govFs = (AnnotationFS) fs.getFeatureValue(governorFeature);
            Type govType = govFs.getType();
            AnnotationUnit govUnit = getFirstUnit(getUnit(govFs.getBegin(), govFs.getEnd(), govFs.getCoveredText()));
            if (ambigUnits.get(govType.getName()).get(govUnit) == null) {
                govUnit = getUnit(govFs.getBegin(), govFs.getEnd(), govFs.getCoveredText());
            }
            AnnotationUnit depUnit = getFirstUnit(getUnit(depFs.getBegin(), depFs.getEnd(), depFs.getCoveredText()));
            if (ambigUnits.get(govType.getName()).get(depUnit) == null) {
                depUnit = getUnit(depFs.getBegin(), depFs.getEnd(), depFs.getCoveredText());
            }
            if (type.getName().equals(Dependency.class.getName())) {
                govType = aJCas.getCas().getTypeSystem().getType(POS.class.getName());
            }
            int govRef = 0;
            int depRef = 0;
            // The WebAnno world do not ever process Token as an annotation
            if (!govType.getName().equals(Token.class.getName()) && ambigUnits.get(govType.getName()).get(govUnit).equals(true)) {
                govRef = annotaionRefPerType.get(govType).get(govFs);
            }
            if (!govType.getName().equals(Token.class.getName()) && ambigUnits.get(govType.getName()).get(depUnit).equals(true)) {
                depRef = annotaionRefPerType.get(govType).get(depFs);
            }
            setRelationAnnoPerFeature(annotationsPertype, type, fs, depUnit, govUnit, govRef, depRef, govType);
        }
        if (annotationsPertype.keySet().size() > 0) {
            annotationsPerPostion.put(l, annotationsPertype);
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) ArrayList(java.util.ArrayList) List(java.util.List) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Feature(org.apache.uima.cas.Feature)

Example 9 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv3Writer method setChainAnnoPerFeature.

private void setChainAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo, boolean aMultiUnit, boolean aFirst) {
    List<String> annoPerFeatures = new ArrayList<>();
    for (Feature feature : aType.getFeatures()) {
        if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
            continue;
        }
        String annotation = aFs.getFeatureValueAsString(feature);
        if (annotation == null) {
            annotation = "*";
        } else {
            annotation = replaceEscapeChars(annotation);
        }
        if (feature.getShortName().equals(REF_REL)) {
            annotation = annotation + "->" + achainNo + "-" + aLinkNo;
        } else if (aMultiUnit) {
            annotation = annotation + "[" + achainNo + "]";
        } else {
            annotation = annotation + "[" + achainNo + "]";
        }
        featurePerLayer.get(aType.getName()).add(feature.getShortName());
        annoPerFeatures.add(annotation);
    }
    aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
    ambigUnits.putIfAbsent(aType.getName(), new HashMap<>());
    // coref are always ambig
    ambigUnits.get(aType.getName()).put(aUnit, true);
    if (annoPerFeatures.size() == 0) {
        annoPerFeatures.add("*" + "[" + achainNo + "]");
    }
    aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
Also used : ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature)

Example 10 with Feature

use of org.apache.uima.cas.Feature in project webanno by webanno.

the class WebannoTsv3Writer method setSpanAnnoPerFeature.

private void setSpanAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, boolean aIsMultiToken, boolean aIsFirst) {
    List<String> annoPerFeatures = new ArrayList<>();
    featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
    int ref = getRefId(aType, aFs, aUnit);
    if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) != null && ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)).equals(false)) {
        ref = 0;
    }
    if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) == null && ambigUnits.get(aType.getName()).get(aUnit).equals(false)) {
        ref = 0;
    }
    for (Feature feature : aType.getFeatures()) {
        if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
            continue;
        }
        // if slot feature
        if (slotFeatures != null && slotFeatures.contains(feature.getName())) {
            if (aFs.getFeatureValue(feature) != null) {
                ArrayFS array = (ArrayFS) aFs.getFeatureValue(feature);
                StringBuilder sbRole = new StringBuilder();
                StringBuilder sbTarget = new StringBuilder();
                for (FeatureStructure linkFS : array.toArray()) {
                    String role = linkFS.getStringValue(linkFS.getType().getFeatureByBaseName("role"));
                    AnnotationFS targetFs = (AnnotationFS) linkFS.getFeatureValue(linkFS.getType().getFeatureByBaseName("target"));
                    Type tType = targetFs.getType();
                    AnnotationUnit firstUnit = getFirstUnit(targetFs);
                    ref = getRefId(tType, targetFs, firstUnit);
                    // Check if the target is ambiguous or not
                    if (ambigUnits.get(tType.getName()).get(firstUnit).equals(false)) {
                        ref = 0;
                    }
                    if (role == null) {
                        role = "*";
                    } else {
                        // Escape special character
                        role = replaceEscapeChars(role);
                    }
                    if (sbRole.length() < 1) {
                        sbRole.append(role);
                        // record the actual target type column number if slot target is
                        // uima.tcas.Annotation
                        int targetTypeNumber = 0;
                        if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                            targetTypeNumber = layerMaps.get(tType);
                        }
                        sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
                    } else {
                        sbRole.append(";");
                        sbTarget.append(";");
                        sbRole.append(role);
                        int targetTypeNumber = 0;
                        if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                            targetTypeNumber = layerMaps.get(tType);
                        }
                        sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
                    }
                }
                annoPerFeatures.add(sbRole.toString().isEmpty() ? "_" : sbRole.toString());
                annoPerFeatures.add(sbTarget.toString().isEmpty() ? "_" : sbTarget.toString());
            } else {
                // setting it to null
                annoPerFeatures.add("_");
                annoPerFeatures.add("_");
            }
            featurePerLayer.get(aType.getName()).add(ROLE + feature.getName() + "_" + slotLinkTypes.get(feature.getName()));
            featurePerLayer.get(aType.getName()).add(slotFeatureTypes.get(feature).getName());
        } else {
            String annotation = aFs.getFeatureValueAsString(feature);
            if (annotation == null) {
                annotation = "*";
            } else {
                // Escape special character
                annotation = replaceEscapeChars(annotation);
            }
            annotation = annotation + (ref > 0 ? "[" + ref + "]" : "");
            // only add BIO markers to multiple annotations
            setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, annotation);
            featurePerLayer.get(aType.getName()).add(feature.getShortName());
        }
    }
    aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
    // If the layer do not have a feature at all, add dummy * as a place holder
    if (annoPerFeatures.size() == 0) {
        setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, "*" + (ref > 0 ? "[" + ref + "]" : ""));
    }
    aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
Also used : ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) ArrayFS(org.apache.uima.cas.ArrayFS)

Aggregations

Feature (org.apache.uima.cas.Feature)84 Type (org.apache.uima.cas.Type)62 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)50 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)48 ArrayList (java.util.ArrayList)23 FeatureStructure (org.apache.uima.cas.FeatureStructure)18 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)18 JCas (org.apache.uima.jcas.JCas)18 List (java.util.List)15 Test (org.junit.Test)14 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)13 WebAnnoCasUtil.setFeature (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature)12 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)12 CAS (org.apache.uima.cas.CAS)10 HashSet (java.util.HashSet)8 LinkedHashMap (java.util.LinkedHashMap)8 Map (java.util.Map)8 HashMap (java.util.HashMap)7 TypeSystem (org.apache.uima.cas.TypeSystem)7 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)6