Search in sources :

Example 11 with AnnotationUnit

use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.

the class WebannoTsv3Writer method process.

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
        resetVariables();
        setSlotLinkTypes();
        setLinkMaps(aJCas);
        setTokenSentenceAddress(aJCas);
        setAmbiguity(aJCas);
        setSpanAnnotation(aJCas);
        setChainAnnotation(aJCas);
        setRelationAnnotation(aJCas);
        writeHeader(docOS);
        for (AnnotationUnit unit : units) {
            if (sentenceUnits.containsKey(unit)) {
                String[] sentWithNl = sentenceUnits.get(unit).split("\n");
                IOUtils.write(LF + "#Text=" + escapeSpecial(sentWithNl[0]) + LF, docOS, encoding);
                // GITHUB ISSUE 318: New line in sentence should be exported as is
                if (sentWithNl.length > 1) {
                    for (int i = 0; i < sentWithNl.length - 1; i++) {
                        IOUtils.write("#Text=" + escapeSpecial(sentWithNl[i + 1]) + LF, docOS, encoding);
                    }
                }
            }
            if (unit.isSubtoken) {
                IOUtils.write(unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB, docOS, encoding);
            } else {
                IOUtils.write(unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB, docOS, encoding);
            }
            for (String type : featurePerLayer.keySet()) {
                List<List<String>> annos = annotationsPerPostion.getOrDefault(type, new HashMap<>()).getOrDefault(unit, new ArrayList<>());
                List<String> merged = null;
                for (List<String> annofs : annos) {
                    if (merged == null) {
                        merged = annofs;
                    } else {
                        for (int i = 0; i < annofs.size(); i++) {
                            merged.set(i, merged.get(i) + "|" + annofs.get(i));
                        }
                    }
                }
                if (merged != null) {
                    for (String anno : merged) {
                        IOUtils.write(anno + TAB, docOS, encoding);
                    }
                } else // No annotation of this type in this layer
                {
                    // if type do not have a feature,
                    if (featurePerLayer.get(type).size() == 0) {
                        IOUtils.write("_" + TAB, docOS, encoding);
                    } else {
                        for (String feature : featurePerLayer.get(type)) {
                            IOUtils.write("_" + TAB, docOS, encoding);
                        }
                    }
                }
            }
            IOUtils.write(LF, docOS, encoding);
        }
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) List(java.util.List) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 12 with AnnotationUnit

use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.

the class WebannoTsv3Writer method setTokenSentenceAddress.

private void setTokenSentenceAddress(JCas aJCas) {
    int sentNMumber = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        int lineNumber = 1;
        for (Token token : selectCovered(Token.class, sentence)) {
            AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false, token.getCoveredText());
            units.add(unit);
            if (lineNumber == 1) {
                sentenceUnits.put(unit, sentence.getCoveredText());
            }
            unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
            lineNumber++;
        }
        sentNMumber++;
    }
}
Also used : AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 13 with AnnotationUnit

use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.

the class WebannoTsv3Writer method setAmbiguity.

private void setAmbiguity(JCas aJCas) {
    List<String> spanAndTokenLayers = spanLayers;
    spanAndTokenLayers.add(Token.class.getName());
    for (String l : spanAndTokenLayers) {
        Type type = getType(aJCas.getCas(), l);
        ambigUnits.putIfAbsent(type.getName(), new HashMap<>());
        for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
            AnnotationUnit unit = getFirstUnit(fs);
            // multiple token anno
            if (isMultipleTokenAnnotation(fs.getBegin(), fs.getEnd())) {
                SubTokenAnno sta = new SubTokenAnno();
                sta.setBegin(fs.getBegin());
                sta.setEnd(fs.getEnd());
                sta.setText(fs.getCoveredText());
                Set<AnnotationUnit> sus = new LinkedHashSet<>();
                for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                    ambigUnits.get(type.getName()).put(newUnit, true);
                }
            } else // stacked anno
            if (ambigUnits.get(type.getName()).get(unit) != null) {
                ambigUnits.get(type.getName()).put(unit, true);
            } else // single or first occurrence of stacked anno
            {
                ambigUnits.get(type.getName()).put(unit, false);
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)

Example 14 with AnnotationUnit

use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.

the class WebannoTsv3Writer method setSpanAnnotation.

private void setSpanAnnotation(JCas aJCas) {
    int i = 0;
    // store slot targets for each slot features
    for (String l : spanLayers) {
        Type type = getType(aJCas.getCas(), l);
        for (Feature f : type.getFeatures()) {
            if (slotFeatures != null && slotFeatures.contains(f.getName())) {
                slotFeatureTypes.put(f, getType(aJCas.getCas(), slotTargets.get(i)));
                i++;
            }
        }
    }
    for (String l : spanLayers) {
        if (l.equals(Token.class.getName())) {
            continue;
        }
        Map<AnnotationUnit, List<List<String>>> annotationsPertype;
        if (annotationsPerPostion.get(l) == null) {
            annotationsPertype = new HashMap<>();
        } else {
            annotationsPertype = annotationsPerPostion.get(l);
        }
        Type type = getType(aJCas.getCas(), l);
        for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
            AnnotationUnit unit = new AnnotationUnit(fs.getBegin(), fs.getEnd(), false, fs.getCoveredText());
            // annotation is per Token
            if (units.contains(unit)) {
                setSpanAnnoPerFeature(annotationsPertype, type, fs, unit, false, false);
            } else // Annotation is on sub-token or multiple tokens
            {
                SubTokenAnno sta = new SubTokenAnno();
                sta.setBegin(fs.getBegin());
                sta.setEnd(fs.getEnd());
                sta.setText(fs.getCoveredText());
                boolean isMultiToken = isMultiToken(fs);
                boolean isFirst = true;
                Set<AnnotationUnit> sus = new LinkedHashSet<>();
                for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                    setSpanAnnoPerFeature(annotationsPertype, type, fs, newUnit, isMultiToken, isFirst);
                    isFirst = false;
                }
            }
        }
        if (annotationsPertype.keySet().size() > 0) {
            annotationsPerPostion.put(l, annotationsPertype);
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.apache.uima.cas.Feature) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) ArrayList(java.util.ArrayList) List(java.util.List)

Example 15 with AnnotationUnit

use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.

the class WebannoTsv3Writer method setChainAnnotation.

private void setChainAnnotation(JCas aJCas) {
    for (String l : chainLayers) {
        if (l.equals(Token.class.getName())) {
            continue;
        }
        Map<AnnotationUnit, List<List<String>>> annotationsPertype = null;
        Type type = getType(aJCas.getCas(), l + CHAIN);
        Feature chainFirst = type.getFeatureByBaseName(FIRST);
        int chainNo = 1;
        for (FeatureStructure chainFs : selectFS(aJCas.getCas(), type)) {
            AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
            AnnotationUnit unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
            Type lType = linkFs.getType();
            // this is the layer with annotations
            l = lType.getName();
            if (annotationsPerPostion.get(l) == null) {
                annotationsPertype = new HashMap<>();
            } else {
                annotationsPertype = annotationsPerPostion.get(l);
            }
            Feature linkNext = linkFs.getType().getFeatureByBaseName(NEXT);
            int linkNo = 1;
            while (linkFs != null) {
                AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);
                if (nextLinkFs != null) {
                    addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                } else {
                    addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                }
                linkFs = nextLinkFs;
                linkNo++;
                if (nextLinkFs != null) {
                    unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
                }
            }
            if (annotationsPertype.keySet().size() > 0) {
                annotationsPerPostion.put(l, annotationsPertype);
            }
            chainNo++;
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) AnnotationUnit(de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) ArrayList(java.util.ArrayList) List(java.util.List) Feature(org.apache.uima.cas.Feature)

Aggregations

AnnotationUnit (de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit)16 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)9 ArrayList (java.util.ArrayList)8 Type (org.apache.uima.cas.Type)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)6 List (java.util.List)6 Feature (org.apache.uima.cas.Feature)6 LinkedHashSet (java.util.LinkedHashSet)5 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)5 HashMap (java.util.HashMap)4 LinkedHashMap (java.util.LinkedHashMap)4 FeatureStructure (org.apache.uima.cas.FeatureStructure)3 IOException (java.io.IOException)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)1 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)1 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)1 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)1 Stem (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)1