Search in sources :

Example 1 with Stem

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.

the class Tsv3XDeserializer method getOrCreateSpanAnnotation.

private AnnotationFS getOrCreateSpanAnnotation(TsvColumn aCol, TsvUnit aUnit, int aStackingIndex, String aDisambiguationInfo) {
    int disambiguationId = aDisambiguationInfo != null ? Integer.valueOf(aDisambiguationInfo) : -1;
    // Check if we have seen the same annotation already in the current unit but in another
    // column.
    AnnotationFS annotation = aUnit.getUimaAnnotation(aCol.uimaType, aStackingIndex);
    // If not, check if we have seen the same annotation already in a previous unit
    if (annotation == null && disambiguationId != -1) {
        annotation = aUnit.getDocument().getDisambiguatedAnnotation(disambiguationId);
        if (annotation != null) {
            aUnit.addUimaAnnotation(annotation);
            // Extend the span of the existing annotation
            // Unfortunately, the AnnotationFS interface does not define a setEnd() method.
            setFeature(annotation, CAS.FEATURE_BASE_NAME_END, aUnit.getEnd());
        }
    }
    // Still no annotation? Then we have to create one
    if (annotation == null) {
        annotation = aUnit.getDocument().getJCas().getCas().createAnnotation(aCol.uimaType, aUnit.getBegin(), aUnit.getEnd());
        aUnit.addUimaAnnotation(annotation);
        // Check if there are slot features that need to be initialized
        List<TsvColumn> otherColumnsForType = aUnit.getDocument().getSchema().getColumns(aCol.uimaType);
        for (TsvColumn col : otherColumnsForType) {
            if (SLOT_TARGET.equals(col.featureType)) {
                setFeature(annotation, col.uimaFeature.getShortName(), emptyList());
            }
        }
        // Special handling of DKPro Core Token-attached annotations
        if (Lemma.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setLemma((Lemma) annotation);
        }
        if (Stem.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setStem((Stem) annotation);
        }
        if (MorphologicalFeatures.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setMorph((MorphologicalFeatures) annotation);
        }
        if (POS.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setPos((POS) annotation);
        }
    }
    // to extend the range of multi-token IDs.
    if (disambiguationId != -1) {
        aUnit.getDocument().addDisambiguationId(annotation, disambiguationId);
    }
    return annotation;
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)

Example 2 with Stem

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testTokenAttachedAnnotationsWithValues.

@Test
public void testTokenAttachedAnnotationsWithValues() throws Exception {
    JCas jcas = makeJCasOneSentence();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Lemma l1 = new Lemma(jcas, t1.getBegin(), t1.getEnd());
    l1.setValue("lemma1");
    l1.addToIndexes();
    t1.setLemma(l1);
    MorphologicalFeatures m1 = new MorphologicalFeatures(jcas, t1.getBegin(), t1.getEnd());
    m1.setValue("morph");
    m1.setTense("tense1");
    m1.addToIndexes();
    t1.setMorph(m1);
    POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
    p1.setPosValue("pos1");
    p1.addToIndexes();
    t1.setPos(p1);
    Stem s1 = new Stem(jcas, t1.getBegin(), t1.getEnd());
    s1.setValue("stem1");
    s1.addToIndexes();
    t1.setStem(s1);
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(MorphologicalFeatures.class, POS.class, Lemma.class, Stem.class));
}
Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem) Test(org.junit.Test)

Example 3 with Stem

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.

the class MergeCas method reMergeCas.

/**
 * Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
 * MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
 * <p>
 * Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
 * <p>
 * Any two annotation with different value will be assumed a difference
 * <p>
 * Any non stacked empty/null annotations are assumed agreement
 * <p>
 * Any non stacked annotations with similar values for each of the features are assumed
 * agreement
 * <p>
 * Any two link mode / slotable annotations which agree on the base features are assumed
 * agreement
 *
 * @param aDiff
 *            the {@code CasDiff2.DiffResult}
 * @param aJCases
 *            a map of{@code JCas}s for each users and the random merge
 * @return the actual merge {@code JCas}
 */
public static JCas reMergeCas(DiffResult aDiff, Map<String, JCas> aJCases) {
    Set<FeatureStructure> slotFeaturesToReset = new HashSet<>();
    Set<FeatureStructure> annotationsToDelete = new HashSet<>();
    Set<String> users = aJCases.keySet();
    for (Position position : aDiff.getPositions()) {
        Map<String, List<FeatureStructure>> annosPerUser = new HashMap<>();
        ConfigurationSet cfgs = aDiff.getConfigurtionSet(position);
        if (cfgs.getConfigurations(WebAnnoConst.CURATION_USER).isEmpty()) {
            // annotations
            continue;
        }
        AnnotationFS mergeAnno = (AnnotationFS) cfgs.getConfigurations(WebAnnoConst.CURATION_USER).get(0).getFs(WebAnnoConst.CURATION_USER, aJCases);
        // Get Annotations per user in this position
        getAllAnnosOnPosition(aJCases, annosPerUser, users, mergeAnno);
        for (FeatureStructure mergeFs : annosPerUser.get(WebAnnoConst.CURATION_USER)) {
            // incomplete annotations
            if (aJCases.size() != annosPerUser.size()) {
                annotationsToDelete.add(mergeFs);
            } else // agreed and not stacked
            if (isAgree(mergeFs, annosPerUser)) {
                Type t = mergeFs.getType();
                Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
                Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
                // Is this a relation?
                if (sourceFeat != null && targetFeat != null) {
                    AnnotationFS source = (AnnotationFS) mergeFs.getFeatureValue(sourceFeat);
                    AnnotationFS target = (AnnotationFS) mergeFs.getFeatureValue(targetFeat);
                    // all span anno on this source positions
                    Map<String, List<FeatureStructure>> sourceAnnosPerUser = new HashMap<>();
                    // all span anno on this target positions
                    Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
                    getAllAnnosOnPosition(aJCases, sourceAnnosPerUser, users, source);
                    getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, target);
                    if (isAgree(source, sourceAnnosPerUser) && isAgree(target, targetAnnosPerUser)) {
                        slotFeaturesToReset.add(mergeFs);
                    } else {
                        annotationsToDelete.add(mergeFs);
                    }
                } else {
                    slotFeaturesToReset.add(mergeFs);
                }
            } else // disagree or stacked annotations
            {
                annotationsToDelete.add(mergeFs);
            }
        // remove dangling rels
        // setDanglingRelToDel(aJCases.get(CurationPanel.CURATION_USER),
        // mergeFs, annotationsToDelete);
        }
    }
    // remove annotations that do not agree or are a stacked ones
    for (FeatureStructure fs : annotationsToDelete) {
        if (!slotFeaturesToReset.contains(fs)) {
            JCas mergeCas = aJCases.get(WebAnnoConst.CURATION_USER);
            // Check if this difference is on POS, STEM and LEMMA (so remove from the token too)
            Type type = fs.getType();
            int fsBegin = ((AnnotationFS) fs).getBegin();
            int fsEnd = ((AnnotationFS) fs).getEnd();
            if (type.getName().equals(POS.class.getName())) {
                Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
                t.setPos(null);
            }
            if (type.getName().equals(Stem.class.getName())) {
                Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
                t.setStem(null);
            }
            if (type.getName().equals(Lemma.class.getName())) {
                Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
                t.setLemma(null);
            }
            if (type.getName().equals(MorphologicalFeatures.class.getName())) {
                Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
                t.setMorph(null);
            }
            mergeCas.removeFsFromIndexes(fs);
        }
    }
    // if slot bearing annotation, clean
    for (FeatureStructure baseFs : slotFeaturesToReset) {
        for (Feature roleFeature : baseFs.getType().getFeatures()) {
            if (isLinkMode(baseFs, roleFeature)) {
                // FeatureStructure roleFs = baseFs.getFeatureValue(f);
                ArrayFS roleFss = (ArrayFS) WebAnnoCasUtil.getFeatureFS(baseFs, roleFeature.getShortName());
                if (roleFss == null) {
                    continue;
                }
                Map<String, ArrayFS> roleAnnosPerUser = new HashMap<>();
                setAllRoleAnnosOnPosition(aJCases, roleAnnosPerUser, users, baseFs, roleFeature);
                List<FeatureStructure> linkFSes = new LinkedList<>(Arrays.asList(roleFss.toArray()));
                for (FeatureStructure roleFs : roleFss.toArray()) {
                    if (isRoleAgree(roleFs, roleAnnosPerUser)) {
                        for (Feature targetFeature : roleFs.getType().getFeatures()) {
                            if (isBasicFeature(targetFeature)) {
                                continue;
                            }
                            if (!targetFeature.getShortName().equals("target")) {
                                continue;
                            }
                            AnnotationFS targetFs = (AnnotationFS) roleFs.getFeatureValue(targetFeature);
                            if (targetFs == null) {
                                continue;
                            }
                            Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
                            getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, targetFs);
                            // do not agree on targets
                            if (!isAgree(targetFs, targetAnnosPerUser)) {
                                linkFSes.remove(roleFs);
                            }
                        }
                    } else // do not agree on some role features
                    {
                        linkFSes.remove(roleFs);
                    }
                }
                ArrayFS array = baseFs.getCAS().createArrayFS(linkFSes.size());
                array.copyFromArray(linkFSes.toArray(new FeatureStructure[linkFSes.size()]), 0, 0, linkFSes.size());
                baseFs.setFeatureValue(roleFeature, array);
            }
        }
    }
    return aJCases.get(WebAnnoConst.CURATION_USER);
}
Also used : HashMap(java.util.HashMap) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.apache.uima.cas.Feature) WebAnnoCasUtil.setFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) LinkedList(java.util.LinkedList) List(java.util.List) HashSet(java.util.HashSet) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.Position) LinkedList(java.util.LinkedList) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ConfigurationSet) Type(org.apache.uima.cas.Type) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) ArrayFS(org.apache.uima.cas.ArrayFS) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with Stem

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.

the class WebannoTsv3Reader method addAnnotations.

/**
 * Importing span annotations including slot annotations.
 */
private void addAnnotations(JCas aJCas, Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> aAnnosPerTypePerUnit) {
    for (Type type : annotationsPerPostion.keySet()) {
        Map<AnnotationUnit, Map<Integer, AnnotationFS>> multiTokUnits = new HashMap<>();
        int ref = 1;
        // to see if it is on multiple token
        AnnotationFS prevAnnoFs = null;
        for (AnnotationUnit unit : annotationsPerPostion.get(type).keySet()) {
            int end = unit.end;
            List<AnnotationFS> annos = aAnnosPerTypePerUnit.get(type).get(unit);
            int j = 0;
            Feature linkeF = null;
            Map<AnnotationFS, List<FeatureStructure>> linkFSesPerSlotAnno = new HashMap<>();
            if (allLayers.get(type).size() == 0) {
                ref = addAnnotationWithNoFeature(aJCas, type, unit, annos, multiTokUnits, end, ref);
                continue;
            }
            for (Feature feat : allLayers.get(type)) {
                String anno = annotationsPerPostion.get(type).get(unit).get(j);
                if (!anno.equals("_")) {
                    int i = 0;
                    // if it is a slot annotation (multiple slots per
                    // single annotation
                    // (Target1<--role1--Base--role2-->Target2)
                    int slot = 0;
                    boolean targetAdd = false;
                    String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
                    String[] stackedAnnos = anno.split(stackedAnnoRegex);
                    for (String mAnnos : stackedAnnos) {
                        String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
                        for (String mAnno : mAnnos.split(multipleSlotAnno)) {
                            String depRef = "";
                            String multSpliter = "(?<!\\\\)" + Pattern.quote("[");
                            // is this slot target ambiguous?
                            boolean ambigTarget = false;
                            if (mAnno.split(multSpliter).length > 1) {
                                ambigTarget = true;
                                depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
                                ref = depRef.contains("_") ? ref : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
                                mAnno = mAnno.substring(0, mAnno.indexOf("["));
                            }
                            if (mAnno.equals("*")) {
                                mAnno = null;
                            }
                            boolean isMultitoken = false;
                            if (!multiTokUnits.isEmpty() && prevAnnoFs != null && prevAnnoFs.getBegin() != unit.begin) {
                                contAnno: for (AnnotationUnit u : multiTokUnits.keySet()) {
                                    for (Integer r : multiTokUnits.get(u).keySet()) {
                                        if (ref == r) {
                                            isMultitoken = true;
                                            prevAnnoFs = multiTokUnits.get(u).get(r);
                                            break contAnno;
                                        }
                                    }
                                }
                            }
                            if (isMultitoken) {
                                Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                prevAnnoFs.setIntValue(endF, end);
                                mAnno = getEscapeChars(mAnno);
                                prevAnnoFs.setFeatureValueFromString(feat, mAnno);
                                if (feat.getShortName().equals(REF_LINK)) {
                                    // since REF_REL do not start with BIO,
                                    // update it it...
                                    annos.set(i, prevAnnoFs);
                                }
                                setAnnoRefPerUnit(unit, type, ref, prevAnnoFs);
                            } else {
                                if (roleLinks.containsKey(feat)) {
                                    linkeF = feat;
                                    FeatureStructure link = aJCas.getCas().createFS(slotLinkTypes.get(feat));
                                    Feature roleFeat = link.getType().getFeatureByBaseName("role");
                                    mAnno = getEscapeChars(mAnno);
                                    link.setStringValue(roleFeat, mAnno);
                                    linkFSesPerSlotAnno.putIfAbsent(annos.get(i), new ArrayList<>());
                                    linkFSesPerSlotAnno.get(annos.get(i)).add(link);
                                } else if (roleTargets.containsKey(feat)) {
                                    FeatureStructure link = linkFSesPerSlotAnno.get(annos.get(i)).get(slot);
                                    int customTypeNumber = 0;
                                    if (mAnno.split("-").length > 2) {
                                        customTypeNumber = Integer.valueOf(mAnno.substring(mAnno.lastIndexOf("-") + 1));
                                        mAnno = mAnno.substring(0, mAnno.lastIndexOf("-"));
                                    }
                                    AnnotationUnit targetUnit = token2Units.get(mAnno);
                                    Type tType = null;
                                    if (customTypeNumber == 0) {
                                        tType = roleTargets.get(feat);
                                    } else {
                                        tType = layerMaps.get(customTypeNumber);
                                    }
                                    AnnotationFS targetFs;
                                    if (ambigTarget) {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).get(ref);
                                    } else {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).entrySet().iterator().next().getValue();
                                    }
                                    link.setFeatureValue(feat, targetFs);
                                    addSlotAnnotations(linkFSesPerSlotAnno, linkeF);
                                    targetAdd = true;
                                    slot++;
                                } else if (feat.getShortName().equals(REF_REL)) {
                                    int chainNo = Integer.valueOf(mAnno.split("->")[1].split("-")[0]);
                                    int LinkNo = Integer.valueOf(mAnno.split("->")[1].split("-")[1]);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    if (chainAnnosPerTyep.get(type).get(chainNo) != null && chainAnnosPerTyep.get(type).get(chainNo).get(LinkNo) != null) {
                                        continue;
                                    }
                                    String refRel = mAnno.split("->")[0];
                                    refRel = getEscapeChars(refRel);
                                    if (refRel.equals("*")) {
                                        refRel = null;
                                    }
                                    annos.get(i).setFeatureValueFromString(feat, refRel);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).putIfAbsent(chainNo, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).get(chainNo).put(LinkNo, annos.get(i));
                                } else if (feat.getShortName().equals(REF_LINK)) {
                                    mAnno = getEscapeChars(mAnno);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else if (depFeatures.get(type) != null && depFeatures.get(type).equals(feat)) {
                                    int g = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[0]);
                                    int d = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[1]);
                                    Type depType = depTypess.get(type);
                                    AnnotationUnit govUnit = token2Units.get(mAnno);
                                    int l = annotationsPerPostion.get(type).get(unit).size();
                                    String thisUnit = annotationsPerPostion.get(type).get(unit).get(l - 1);
                                    AnnotationUnit depUnit = token2Units.get(thisUnit);
                                    AnnotationFS govFs;
                                    AnnotationFS depFs;
                                    if (depType.getName().equals(POS.class.getName())) {
                                        depType = aJCas.getCas().getTypeSystem().getType(Token.class.getName());
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else // in WebAnno world :)(!
                                    if (depType.getName().equals(Token.class.getName())) {
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else if (g == 0 && d == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    } else if (g == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).get(d);
                                    } else {
                                        govFs = annosPerRef.get(depType).get(govUnit).get(g);
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    }
                                    annos.get(i).setFeatureValue(feat, depFs);
                                    annos.get(i).setFeatureValue(type.getFeatureByBaseName(GOVERNOR), govFs);
                                    if (depFs.getBegin() <= annos.get(i).getBegin()) {
                                        Feature beginF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN);
                                        annos.get(i).setIntValue(beginF, depFs.getBegin());
                                    } else {
                                        Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                        annos.get(i).setIntValue(endF, depFs.getEnd());
                                    }
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else {
                                    mAnno = getEscapeChars(mAnno);
                                    multiTokUnits.putIfAbsent(unit, new HashMap<>());
                                    multiTokUnits.get(unit).put(ref, annos.get(i));
                                    prevAnnoFs = annos.get(i);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                    setAnnoRefPerUnit(unit, type, ref, annos.get(i));
                                }
                            }
                            if (stackedAnnos.length > 1) {
                                ref++;
                            }
                        }
                        if (type.getName().equals(POS.class.getName())) {
                            units2Tokens.get(unit).setPos((POS) annos.get(i));
                        }
                        if (type.getName().equals(Lemma.class.getName())) {
                            units2Tokens.get(unit).setLemma((Lemma) annos.get(i));
                        }
                        if (type.getName().equals(Stem.class.getName())) {
                            units2Tokens.get(unit).setStem((Stem) annos.get(i));
                        }
                        if (type.getName().equals(MorphologicalFeatures.class.getName())) {
                            units2Tokens.get(unit).setMorph((MorphologicalFeatures) annos.get(i));
                        }
                        i++;
                    }
                    if (targetAdd) {
                        linkFSesPerSlotAnno = new HashMap<>();
                    }
                } else {
                    prevAnnoFs = null;
                }
                j++;
            }
            if (prevAnnoFs != null) {
                ref++;
            }
        }
        annosPerRef.put(type, multiTokUnits);
    }
}
Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Feature(org.apache.uima.cas.Feature) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 5 with Stem

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.

the class RemoveZeroSizeTokensAndSentencesRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    try {
        for (Sentence s : select(aCas.getJCas(), Sentence.class)) {
            if (s.getBegin() >= s.getEnd()) {
                s.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed sentence with illegal span: %s", s));
            }
        }
        for (Token t : select(aCas.getJCas(), Token.class)) {
            if (t.getBegin() >= t.getEnd()) {
                Lemma lemma = t.getLemma();
                if (lemma != null) {
                    lemma.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed lemma attached to token with illegal span: %s", t));
                }
                POS pos = t.getPos();
                if (pos != null) {
                    pos.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed POS attached to token with illegal span: %s", t));
                }
                Stem stem = t.getStem();
                if (stem != null) {
                    stem.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed stem attached to token with illegal span: %s", t));
                }
                t.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed token with illegal span: %s", t));
            }
        }
    } catch (CASException e) {
        log.error("Unabled to access JCas", e);
        aMessages.add(new LogMessage(this, LogLevel.ERROR, "Unabled to access JCas", e.getMessage()));
    }
}
Also used : LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CASException(org.apache.uima.cas.CASException) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)

Aggregations

POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)5 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)5 Stem (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)5 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)4 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)3 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Feature (org.apache.uima.cas.Feature)2 FeatureStructure (org.apache.uima.cas.FeatureStructure)2 Type (org.apache.uima.cas.Type)2 JCas (org.apache.uima.jcas.JCas)2 WebAnnoCasUtil.setFeature (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature)1 ConfigurationSet (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ConfigurationSet)1 Position (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.Position)1 LogMessage (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage)1 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)1 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)1