Search in sources :

Example 26 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class WebannoTsv3Reader method addAnnotations.

/**
 * Importing span annotations including slot annotations.
 */
private void addAnnotations(JCas aJCas, Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> aAnnosPerTypePerUnit) {
    for (Type type : annotationsPerPostion.keySet()) {
        Map<AnnotationUnit, Map<Integer, AnnotationFS>> multiTokUnits = new HashMap<>();
        int ref = 1;
        // to see if it is on multiple token
        AnnotationFS prevAnnoFs = null;
        for (AnnotationUnit unit : annotationsPerPostion.get(type).keySet()) {
            int end = unit.end;
            List<AnnotationFS> annos = aAnnosPerTypePerUnit.get(type).get(unit);
            int j = 0;
            Feature linkeF = null;
            Map<AnnotationFS, List<FeatureStructure>> linkFSesPerSlotAnno = new HashMap<>();
            if (allLayers.get(type).size() == 0) {
                ref = addAnnotationWithNoFeature(aJCas, type, unit, annos, multiTokUnits, end, ref);
                continue;
            }
            for (Feature feat : allLayers.get(type)) {
                String anno = annotationsPerPostion.get(type).get(unit).get(j);
                if (!anno.equals("_")) {
                    int i = 0;
                    // if it is a slot annotation (multiple slots per
                    // single annotation
                    // (Target1<--role1--Base--role2-->Target2)
                    int slot = 0;
                    boolean targetAdd = false;
                    String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
                    String[] stackedAnnos = anno.split(stackedAnnoRegex);
                    for (String mAnnos : stackedAnnos) {
                        String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
                        for (String mAnno : mAnnos.split(multipleSlotAnno)) {
                            String depRef = "";
                            String multSpliter = "(?<!\\\\)" + Pattern.quote("[");
                            // is this slot target ambiguous?
                            boolean ambigTarget = false;
                            if (mAnno.split(multSpliter).length > 1) {
                                ambigTarget = true;
                                depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
                                ref = depRef.contains("_") ? ref : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
                                mAnno = mAnno.substring(0, mAnno.indexOf("["));
                            }
                            if (mAnno.equals("*")) {
                                mAnno = null;
                            }
                            boolean isMultitoken = false;
                            if (!multiTokUnits.isEmpty() && prevAnnoFs != null && prevAnnoFs.getBegin() != unit.begin) {
                                contAnno: for (AnnotationUnit u : multiTokUnits.keySet()) {
                                    for (Integer r : multiTokUnits.get(u).keySet()) {
                                        if (ref == r) {
                                            isMultitoken = true;
                                            prevAnnoFs = multiTokUnits.get(u).get(r);
                                            break contAnno;
                                        }
                                    }
                                }
                            }
                            if (isMultitoken) {
                                Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                prevAnnoFs.setIntValue(endF, end);
                                mAnno = getEscapeChars(mAnno);
                                prevAnnoFs.setFeatureValueFromString(feat, mAnno);
                                if (feat.getShortName().equals(REF_LINK)) {
                                    // since REF_REL do not start with BIO,
                                    // update it it...
                                    annos.set(i, prevAnnoFs);
                                }
                                setAnnoRefPerUnit(unit, type, ref, prevAnnoFs);
                            } else {
                                if (roleLinks.containsKey(feat)) {
                                    linkeF = feat;
                                    FeatureStructure link = aJCas.getCas().createFS(slotLinkTypes.get(feat));
                                    Feature roleFeat = link.getType().getFeatureByBaseName("role");
                                    mAnno = getEscapeChars(mAnno);
                                    link.setStringValue(roleFeat, mAnno);
                                    linkFSesPerSlotAnno.putIfAbsent(annos.get(i), new ArrayList<>());
                                    linkFSesPerSlotAnno.get(annos.get(i)).add(link);
                                } else if (roleTargets.containsKey(feat)) {
                                    FeatureStructure link = linkFSesPerSlotAnno.get(annos.get(i)).get(slot);
                                    int customTypeNumber = 0;
                                    if (mAnno.split("-").length > 2) {
                                        customTypeNumber = Integer.valueOf(mAnno.substring(mAnno.lastIndexOf("-") + 1));
                                        mAnno = mAnno.substring(0, mAnno.lastIndexOf("-"));
                                    }
                                    AnnotationUnit targetUnit = token2Units.get(mAnno);
                                    Type tType = null;
                                    if (customTypeNumber == 0) {
                                        tType = roleTargets.get(feat);
                                    } else {
                                        tType = layerMaps.get(customTypeNumber);
                                    }
                                    AnnotationFS targetFs;
                                    if (ambigTarget) {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).get(ref);
                                    } else {
                                        targetFs = annosPerRef.get(tType).get(targetUnit).entrySet().iterator().next().getValue();
                                    }
                                    link.setFeatureValue(feat, targetFs);
                                    addSlotAnnotations(linkFSesPerSlotAnno, linkeF);
                                    targetAdd = true;
                                    slot++;
                                } else if (feat.getShortName().equals(REF_REL)) {
                                    int chainNo = Integer.valueOf(mAnno.split("->")[1].split("-")[0]);
                                    int LinkNo = Integer.valueOf(mAnno.split("->")[1].split("-")[1]);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    if (chainAnnosPerTyep.get(type).get(chainNo) != null && chainAnnosPerTyep.get(type).get(chainNo).get(LinkNo) != null) {
                                        continue;
                                    }
                                    String refRel = mAnno.split("->")[0];
                                    refRel = getEscapeChars(refRel);
                                    if (refRel.equals("*")) {
                                        refRel = null;
                                    }
                                    annos.get(i).setFeatureValueFromString(feat, refRel);
                                    chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).putIfAbsent(chainNo, new TreeMap<>());
                                    chainAnnosPerTyep.get(type).get(chainNo).put(LinkNo, annos.get(i));
                                } else if (feat.getShortName().equals(REF_LINK)) {
                                    mAnno = getEscapeChars(mAnno);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else if (depFeatures.get(type) != null && depFeatures.get(type).equals(feat)) {
                                    int g = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[0]);
                                    int d = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[1]);
                                    Type depType = depTypess.get(type);
                                    AnnotationUnit govUnit = token2Units.get(mAnno);
                                    int l = annotationsPerPostion.get(type).get(unit).size();
                                    String thisUnit = annotationsPerPostion.get(type).get(unit).get(l - 1);
                                    AnnotationUnit depUnit = token2Units.get(thisUnit);
                                    AnnotationFS govFs;
                                    AnnotationFS depFs;
                                    if (depType.getName().equals(POS.class.getName())) {
                                        depType = aJCas.getCas().getTypeSystem().getType(Token.class.getName());
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else // in WebAnno world :)(!
                                    if (depType.getName().equals(Token.class.getName())) {
                                        govFs = units2Tokens.get(govUnit);
                                        depFs = units2Tokens.get(unit);
                                    } else if (g == 0 && d == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    } else if (g == 0) {
                                        govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
                                        depFs = annosPerRef.get(depType).get(depUnit).get(d);
                                    } else {
                                        govFs = annosPerRef.get(depType).get(govUnit).get(g);
                                        depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
                                    }
                                    annos.get(i).setFeatureValue(feat, depFs);
                                    annos.get(i).setFeatureValue(type.getFeatureByBaseName(GOVERNOR), govFs);
                                    if (depFs.getBegin() <= annos.get(i).getBegin()) {
                                        Feature beginF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN);
                                        annos.get(i).setIntValue(beginF, depFs.getBegin());
                                    } else {
                                        Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
                                        annos.get(i).setIntValue(endF, depFs.getEnd());
                                    }
                                    aJCas.addFsToIndexes(annos.get(i));
                                } else {
                                    mAnno = getEscapeChars(mAnno);
                                    multiTokUnits.putIfAbsent(unit, new HashMap<>());
                                    multiTokUnits.get(unit).put(ref, annos.get(i));
                                    prevAnnoFs = annos.get(i);
                                    annos.get(i).setFeatureValueFromString(feat, mAnno);
                                    aJCas.addFsToIndexes(annos.get(i));
                                    setAnnoRefPerUnit(unit, type, ref, annos.get(i));
                                }
                            }
                            if (stackedAnnos.length > 1) {
                                ref++;
                            }
                        }
                        if (type.getName().equals(POS.class.getName())) {
                            units2Tokens.get(unit).setPos((POS) annos.get(i));
                        }
                        if (type.getName().equals(Lemma.class.getName())) {
                            units2Tokens.get(unit).setLemma((Lemma) annos.get(i));
                        }
                        if (type.getName().equals(Stem.class.getName())) {
                            units2Tokens.get(unit).setStem((Stem) annos.get(i));
                        }
                        if (type.getName().equals(MorphologicalFeatures.class.getName())) {
                            units2Tokens.get(unit).setMorph((MorphologicalFeatures) annos.get(i));
                        }
                        i++;
                    }
                    if (targetAdd) {
                        linkFSesPerSlotAnno = new HashMap<>();
                    }
                } else {
                    prevAnnoFs = null;
                }
                j++;
            }
            if (prevAnnoFs != null) {
                ref++;
            }
        }
        annosPerRef.put(type, multiTokUnits);
    }
}
Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Feature(org.apache.uima.cas.Feature) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 27 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class ConllUWriter method convert.

private void convert(JCas aJCas, PrintWriter aOut) {
    Map<SurfaceForm, Collection<Token>> surfaceIdx = indexCovered(aJCas, SurfaceForm.class, Token.class);
    Int2ObjectMap<SurfaceForm> surfaceBeginIdx = new Int2ObjectOpenHashMap<>();
    for (SurfaceForm sf : select(aJCas, SurfaceForm.class)) {
        surfaceBeginIdx.put(sf.getBegin(), sf);
    }
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        HashMap<Token, Row> ctokens = new LinkedHashMap<>();
        // Tokens
        List<Token> tokens = selectCovered(Token.class, sentence);
        for (int i = 0; i < tokens.size(); i++) {
            Row row = new Row();
            row.id = i + 1;
            row.token = tokens.get(i);
            row.noSpaceAfter = (i + 1 < tokens.size()) && row.token.getEnd() == tokens.get(i + 1).getBegin();
            ctokens.put(row.token, row);
        }
        // Dependencies
        for (Dependency rel : selectCovered(Dependency.class, sentence)) {
            String flavor = FSUtil.getFeature(rel, "flavor", String.class);
            if (StringUtils.isBlank(flavor) || DependencyFlavor.BASIC.equals(flavor)) {
                ctokens.get(rel.getDependent()).deprel = rel;
            } else {
                ctokens.get(rel.getDependent()).deps.add(rel);
            }
        }
        // Write sentence in CONLL-U format
        for (Row row : ctokens.values()) {
            String lemma = UNUSED;
            if (writeLemma && (row.token.getLemma() != null)) {
                lemma = row.token.getLemma().getValue();
            }
            String pos = UNUSED;
            String cpos = UNUSED;
            if (writePos && (row.token.getPos() != null)) {
                POS posAnno = row.token.getPos();
                pos = posAnno.getPosValue();
                cpos = dkpro2ud.get(posAnno.getClass());
                if (StringUtils.isBlank(cpos)) {
                    cpos = pos;
                }
            }
            int headId = UNUSED_INT;
            String deprel = UNUSED;
            String deps = UNUSED;
            if (writeDependency) {
                if ((row.deprel != null)) {
                    deprel = row.deprel.getDependencyType();
                    headId = ctokens.get(row.deprel.getGovernor()).id;
                    if (headId == row.id) {
                        // ROOT dependencies may be modeled as a loop, ignore these.
                        headId = 0;
                    }
                }
                StringBuilder depsBuf = new StringBuilder();
                for (Dependency d : row.deps) {
                    if (depsBuf.length() > 0) {
                        depsBuf.append('|');
                    }
                    // Resolve self-looping root to 0-indexed root
                    int govId = ctokens.get(d.getGovernor()).id;
                    if (govId == row.id) {
                        govId = 0;
                    }
                    depsBuf.append(govId);
                    depsBuf.append(':');
                    depsBuf.append(d.getDependencyType());
                }
                if (depsBuf.length() > 0) {
                    deps = depsBuf.toString();
                }
            }
            String head = UNUSED;
            if (headId != UNUSED_INT) {
                head = Integer.toString(headId);
            }
            String feats = UNUSED;
            if (writeMorph && (row.token.getMorph() != null)) {
                feats = row.token.getMorph().getValue();
            }
            String misc = UNUSED;
            if (row.noSpaceAfter) {
                misc = "SpaceAfter=No";
            }
            SurfaceForm sf = surfaceBeginIdx.get(row.token.getBegin());
            if (sf != null) {
                @SuppressWarnings({ "unchecked", "rawtypes" }) List<Token> covered = (List) surfaceIdx.get(sf);
                int id1 = ctokens.get(covered.get(0)).id;
                int id2 = ctokens.get(covered.get(covered.size() - 1)).id;
                aOut.printf("%d-%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id1, id2, sf.getValue(), UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED);
            }
            aOut.printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", row.id, row.token.getCoveredText(), lemma, cpos, pos, feats, head, deprel, deps, misc);
        }
        aOut.println();
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) LinkedHashMap(java.util.LinkedHashMap) SurfaceForm(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.SurfaceForm) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 28 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class TcfReader method convertDependencies.

private void convertDependencies(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
    if (aCorpusData.getDependencyParsingLayer() == null) {
        // No layer to read from.
        return;
    }
    for (int i = 0; i < aCorpusData.getDependencyParsingLayer().size(); i++) {
        DependencyParse dependencyParse = aCorpusData.getDependencyParsingLayer().getParse(i);
        for (eu.clarin.weblicht.wlfxb.tc.api.Dependency dependency : dependencyParse.getDependencies()) {
            eu.clarin.weblicht.wlfxb.tc.api.Token[] governorTokens = aCorpusData.getDependencyParsingLayer().getGovernorTokens(dependency);
            eu.clarin.weblicht.wlfxb.tc.api.Token[] dependentTokens = aCorpusData.getDependencyParsingLayer().getDependentTokens(dependency);
            POS dependentPos = aTokens.get(dependentTokens[0].getID()).getPos();
            // as a default POS --
            if (dependentPos == null) {
                getUimaContext().getLogger().log(Level.INFO, "There is no pos for this token, added is -- as a pos");
                dependentPos = new POS(aJCas);
                dependentPos.setBegin(aTokens.get(dependentTokens[0].getID()).getBegin());
                dependentPos.setEnd(aTokens.get(dependentTokens[0].getID()).getEnd());
                dependentPos.setPosValue("--");
                dependentPos.addToIndexes();
                aTokens.get(dependentTokens[0].getID()).setPos(dependentPos);
            }
            if (governorTokens != null) {
                POS governerPos = aTokens.get(governorTokens[0].getID()).getPos();
                if (governerPos == null) {
                    if (dependency.getFunction().equals("ROOT")) {
                    // do nothing
                    } else {
                        getUimaContext().getLogger().log(Level.INFO, "There is no pos for this token, added is -- as a pos");
                        governerPos = new POS(aJCas);
                        governerPos.setBegin(aTokens.get(governorTokens[0].getID()).getBegin());
                        governerPos.setEnd(aTokens.get(governorTokens[0].getID()).getEnd());
                        governerPos.setPosValue("--");
                        governerPos.addToIndexes();
                        aTokens.get(governorTokens[0].getID()).setPos(governerPos);
                    }
                }
            } else {
                governorTokens = dependentTokens;
            }
            Dependency outDependency = new Dependency(aJCas);
            outDependency.setDependencyType(dependency.getFunction());
            outDependency.setGovernor(aTokens.get(governorTokens[0].getID()));
            outDependency.setDependent(dependency.getFunction().equals("ROOT") ? aTokens.get(governorTokens[0].getID()) : aTokens.get(dependentTokens[0].getID()));
            outDependency.setBegin(outDependency.getDependent().getBegin());
            outDependency.setEnd(outDependency.getDependent().getEnd());
            outDependency.addToIndexes();
        }
    }
}
Also used : DependencyParse(eu.clarin.weblicht.wlfxb.tc.api.DependencyParse) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)

Example 29 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class WebannoTsv2Reader method createSpanAnnotation.

private void createSpanAnnotation(JCas aJcas, int aTokenStart, Map<Type, Set<Feature>> aLayers, Map<Type, Type> aRelationayers, Map<Type, Map<Integer, AnnotationFS>> aAnnotations, Map<Type, Map<Integer, String>> aBeginEndAnno, Map<Type, Map<String, List<AnnotationFS>>> aTokenAnnotations, Map<Type, Map<String, List<String>>> aRelationTargets, StringTokenizer lineTk, String aToken, String aTokenNumberColumn) {
    for (Type layer : aLayers.keySet()) {
        int lastIndex = 1;
        // if a layer is bound to a single token but has multiple feature
        // annotation is created once and feature values be appended
        Map<Integer, AnnotationFS> singleTokenMultiFeature = new HashMap<>();
        // The relation line number should be read once all feature columns
        // are obtained
        int numberOfFeaturesPerLayer = aLayers.get(layer).size();
        for (Feature feature : aLayers.get(layer)) {
            numberOfFeaturesPerLayer--;
            int index = 1;
            String multipleAnnotations = lineTk.nextToken();
            String relationTargetNumbers = null;
            if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
                relationTargetNumbers = lineTk.nextToken();
            }
            int i = 0;
            String[] relationTargets = null;
            if (relationTargetNumbers != null) {
                relationTargets = relationTargetNumbers.split("\\|");
            }
            for (String annotation : multipleAnnotations.split("\\|")) {
                // If annotation is not on multpile spans
                if (!(annotation.startsWith("B-") || annotation.startsWith("I-") || annotation.startsWith("O-")) && !(annotation.equals("_") || annotation.equals("O"))) {
                    AnnotationFS newAnnotation;
                    // annotation only once
                    if (singleTokenMultiFeature.get(index) == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                        singleTokenMultiFeature.put(index, newAnnotation);
                    } else {
                        newAnnotation = singleTokenMultiFeature.get(index);
                    }
                    // stripped out - make it null
                    if (annotation.startsWith(layer.getName())) {
                        annotation = null;
                    }
                    newAnnotation.setFeatureValueFromString(feature, annotation);
                    aJcas.addFsToIndexes(newAnnotation);
                    // Set the POS to the token
                    if (layer.getName().equals(POS.class.getName())) {
                        indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setPos((POS) newAnnotation);
                    }
                    // Set the Lemma to the token
                    if (layer.getName().equals(Lemma.class.getName())) {
                        indexedTokens.get(aTokenStart + "-" + aTokenStart + aToken.length()).setLemma((Lemma) newAnnotation);
                    }
                    if (aRelationayers.containsKey(layer) && numberOfFeaturesPerLayer == 0) {
                        Map<String, List<String>> targets = aRelationTargets.get(layer);
                        if (targets == null) {
                            List<String> governors = new ArrayList<>();
                            governors.add(relationTargets[i]);
                            targets = new HashMap<>();
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        } else {
                            List<String> governors = targets.get(aTokenNumberColumn);
                            if (governors == null) {
                                governors = new ArrayList<>();
                            }
                            governors.add(relationTargets[i]);
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        }
                    }
                    Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
                    if (tokenAnnotations == null) {
                        tokenAnnotations = new HashMap<>();
                    }
                    List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
                    if (relAnnos == null) {
                        relAnnos = new ArrayList<>();
                    }
                    relAnnos.add(newAnnotation);
                    tokenAnnotations.put(aTokenNumberColumn, relAnnos);
                    aTokenAnnotations.put(layer, tokenAnnotations);
                    index++;
                } else // O-_ is a position marker
                if (annotation.equals("O-_") || annotation.equals("B-_") || annotation.equals("I-_")) {
                    index++;
                } else if (annotation.startsWith("B-")) {
                    boolean isNewAnnotation = true;
                    Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
                    Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
                    AnnotationFS newAnnotation;
                    if (indexedAnnos == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                        indexedAnnos = new LinkedHashMap<>();
                        indexedBeginEndAnnos = new LinkedHashMap<>();
                    } else if (indexedAnnos.get(index) == null) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else if (indexedAnnos.get(index) != null && indexedBeginEndAnnos.get(index).equals("E-")) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else // annotation
                    if (indexedBeginEndAnnos.get(index).equals("I-")) {
                        newAnnotation = aJcas.getCas().createAnnotation(layer, aTokenStart, aTokenStart + aToken.length());
                    } else {
                        newAnnotation = indexedAnnos.get(index);
                        isNewAnnotation = false;
                    }
                    // remove prefixes such as B-/I- before creating the
                    // annotation
                    annotation = (annotation.substring(2));
                    if (annotation.startsWith(layer.getName())) {
                        annotation = null;
                    }
                    newAnnotation.setFeatureValueFromString(feature, annotation);
                    aJcas.addFsToIndexes(newAnnotation);
                    indexedAnnos.put(index, newAnnotation);
                    indexedBeginEndAnnos.put(index, "B-");
                    aAnnotations.put(layer, indexedAnnos);
                    if (aRelationayers.containsKey(layer)) {
                        Map<String, List<String>> targets = aRelationTargets.get(layer);
                        if (targets == null) {
                            List<String> governors = new ArrayList<>();
                            governors.add(relationTargets[i]);
                            targets = new HashMap<>();
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        } else {
                            List<String> governors = targets.get(aTokenNumberColumn);
                            if (governors == null) {
                                governors = new ArrayList<>();
                            }
                            governors.add(relationTargets[i]);
                            targets.put(aTokenNumberColumn, governors);
                            i++;
                            aRelationTargets.put(layer, targets);
                        }
                    }
                    Map<String, List<AnnotationFS>> tokenAnnotations = aTokenAnnotations.get(layer);
                    if (isNewAnnotation) {
                        if (tokenAnnotations == null) {
                            tokenAnnotations = new HashMap<>();
                        }
                        List<AnnotationFS> relAnnos = tokenAnnotations.get(aTokenNumberColumn);
                        if (relAnnos == null) {
                            relAnnos = new ArrayList<>();
                        }
                        relAnnos.add(newAnnotation);
                        tokenAnnotations.put(aTokenNumberColumn, relAnnos);
                        aTokenAnnotations.put(layer, tokenAnnotations);
                    }
                    aBeginEndAnno.put(layer, indexedBeginEndAnnos);
                    index++;
                } else if (annotation.startsWith("I-")) {
                    // beginEndAnnotation.put(layer, "I-");
                    Map<Integer, String> indexedBeginEndAnnos = aBeginEndAnno.get(layer);
                    indexedBeginEndAnnos.put(index, "I-");
                    aBeginEndAnno.put(layer, indexedBeginEndAnnos);
                    Map<Integer, AnnotationFS> indexedAnnos = aAnnotations.get(layer);
                    AnnotationFS newAnnotation = indexedAnnos.get(index);
                    ((Annotation) newAnnotation).setEnd(aTokenStart + aToken.length());
                    index++;
                } else {
                    aAnnotations.put(layer, null);
                    index++;
                }
            }
            lastIndex = index - 1;
        }
        // tokens annotated as B-X B-X, no B-I means it is end by itself
        for (int i = 1; i <= lastIndex; i++) {
            if (aBeginEndAnno.get(layer) != null && aBeginEndAnno.get(layer).get(i) != null && aBeginEndAnno.get(layer).get(i).equals("B-")) {
                aBeginEndAnno.get(layer).put(i, "E-");
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Feature(org.apache.uima.cas.Feature) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 30 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method testDependencyWithValues.

@Test
public void testDependencyWithValues() throws Exception {
    JCas jcas = makeJCasOneSentence();
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
    p1.setPosValue("POS1");
    p1.addToIndexes();
    t1.setPos(p1);
    POS p2 = new POS(jcas, t2.getBegin(), t2.getEnd());
    p2.setPosValue("POS2");
    p2.addToIndexes();
    t2.setPos(p2);
    Dependency dep1 = new Dependency(jcas);
    dep1.setGovernor(t1);
    dep1.setDependent(t2);
    // WebAnno legacy conventions
    // dep1.setBegin(min(dep1.getDependent().getBegin(), dep1.getGovernor().getBegin()));
    // dep1.setEnd(max(dep1.getDependent().getEnd(), dep1.getGovernor().getEnd()));
    // DKPro Core conventions
    dep1.setBegin(dep1.getDependent().getBegin());
    dep1.setEnd(dep1.getDependent().getEnd());
    dep1.addToIndexes();
    writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(POS.class), WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList(Dependency.class));
}
Also used : POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Aggregations

POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)35 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)21 ArrayList (java.util.ArrayList)15 JCas (org.apache.uima.jcas.JCas)14 Test (org.junit.Test)12 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)11 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)9 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)8 List (java.util.List)8 Type (org.apache.uima.cas.Type)8 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)7 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)7 LinkedHashMap (java.util.LinkedHashMap)7 Feature (org.apache.uima.cas.Feature)7 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)7 Stem (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)5 HashMap (java.util.HashMap)5 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)3 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)3 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)3