Search in sources :

Example 1 with Dependency

use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method scanUnitForActiveColumns.

private static void scanUnitForActiveColumns(TsvUnit aUnit) {
    for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
        List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
        if (!annotationsForColumn.isEmpty()) {
            if (!PLACEHOLDER.equals(col.featureType)) {
                aUnit.getDocument().activateColumn(col);
            }
            // actual annotation.
            if (RELATION.equals(col.layerType) && RELATION_REF.equals(col.featureType)) {
                AnnotationFS annotation = annotationsForColumn.get(0);
                FeatureStructure target = FSUtil.getFeature(annotation, FEAT_REL_SOURCE, FeatureStructure.class);
                if (target == null) {
                    throw new IllegalStateException("Relation does not have its source feature (" + FEAT_REL_SOURCE + ") set: " + annotation);
                }
                if (col.uimaType.getName().equals(Dependency.class.getName())) {
                    // COMPATIBILITY NOTE:
                    // WebAnnoTsv3Writer hard-changes the target type for DKPro Core
                    // Dependency annotations from Token to POS - the reason is not really
                    // clear. Probably because the Dependency relations in the WebAnno UI
                    // attach to POS (Token's are not visible as annotations in the UI).
                    col.setTargetTypeHint(aUnit.getDocument().getJCas().getTypeSystem().getType(POS.class.getName()));
                } else {
                    col.setTargetTypeHint(target.getType());
                }
            }
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)

Example 2 with Dependency

use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.

the class Conll2009Reader method convert.

public void convert(JCas aJCas, BufferedReader aReader) throws IOException {
    if (readPos) {
        try {
            posMappingProvider.configure(aJCas.getCas());
        } catch (AnalysisEngineProcessException e) {
            throw new IOException(e);
        }
    }
    JCasBuilder doc = new JCasBuilder(aJCas);
    List<String[]> words;
    while ((words = readSentence(aReader)) != null) {
        if (words.isEmpty()) {
            // markers following each other.
            continue;
        }
        int sentenceBegin = doc.getPosition();
        int sentenceEnd = sentenceBegin;
        // Tokens, Lemma, POS
        Map<Integer, Token> tokens = new HashMap<Integer, Token>();
        List<SemPred> preds = new ArrayList<>();
        Iterator<String[]> wordIterator = words.iterator();
        while (wordIterator.hasNext()) {
            String[] word = wordIterator.next();
            // Read token
            Token token = doc.add(word[FORM], Token.class);
            tokens.put(Integer.valueOf(word[ID]), token);
            if (wordIterator.hasNext()) {
                doc.add(" ");
            }
            // Read lemma
            if (!UNUSED.equals(word[LEMMA]) && readLemma) {
                Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd());
                lemma.setValue(word[LEMMA]);
                lemma.addToIndexes();
                token.setLemma(lemma);
            }
            // Read part-of-speech tag
            if (!UNUSED.equals(word[POS]) && readPos) {
                Type posTag = posMappingProvider.getTagType(word[POS]);
                POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
                pos.setPosValue(word[POS].intern());
                // WebAnno did not yet backport the coarse grained POS feature from
                // DKPro Core 1.9.0
                // POSUtils.assignCoarseValue(pos);
                pos.addToIndexes();
                token.setPos(pos);
            }
            // Read morphological features
            if (!UNUSED.equals(word[FEAT]) && readMorph) {
                MorphologicalFeatures morphtag = new MorphologicalFeatures(aJCas, token.getBegin(), token.getEnd());
                morphtag.setValue(word[FEAT]);
                morphtag.addToIndexes();
            }
            if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) {
                SemPred pred = new SemPred(aJCas, token.getBegin(), token.getEnd());
                pred.setCategory(word[PRED]);
                pred.addToIndexes();
                preds.add(pred);
            }
            sentenceEnd = token.getEnd();
        }
        // Dependencies
        if (readDependency) {
            for (String[] word : words) {
                if (!UNUSED.equals(word[DEPREL])) {
                    int depId = Integer.valueOf(word[ID]);
                    int govId = Integer.valueOf(word[HEAD]);
                    // Model the root as a loop onto itself
                    if (govId == 0) {
                        // Not using ROOT here because WebAnno cannot deal with elevated
                        // types
                        Dependency rel = new Dependency(aJCas);
                        rel.setGovernor(tokens.get(depId));
                        rel.setDependent(tokens.get(depId));
                        rel.setDependencyType(word[DEPREL]);
                        rel.setBegin(rel.getDependent().getBegin());
                        rel.setEnd(rel.getDependent().getEnd());
                        // This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
                        // classes
                        FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
                        rel.addToIndexes();
                    } else {
                        Dependency rel = new Dependency(aJCas);
                        rel.setGovernor(tokens.get(govId));
                        rel.setDependent(tokens.get(depId));
                        rel.setDependencyType(word[DEPREL]);
                        rel.setBegin(rel.getDependent().getBegin());
                        rel.setEnd(rel.getDependent().getEnd());
                        // This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
                        // classes
                        FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
                        rel.addToIndexes();
                    }
                }
            }
        }
        // Semantic arguments
        if (readSemanticPredicate) {
            // Get arguments for one predicate at a time
            for (int p = 0; p < preds.size(); p++) {
                List<SemArgLink> args = new ArrayList<>();
                for (String[] word : words) {
                    if (!UNUSED.equals(word[APRED + p])) {
                        Token token = tokens.get(Integer.valueOf(word[ID]));
                        SemArg arg = new SemArg(aJCas, token.getBegin(), token.getEnd());
                        arg.addToIndexes();
                        SemArgLink link = new SemArgLink(aJCas);
                        link.setRole(word[APRED + p]);
                        link.setTarget(arg);
                        args.add(link);
                    }
                }
                SemPred pred = preds.get(p);
                pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args));
            }
        }
        // Sentence
        Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
        sentence.addToIndexes();
        // Once sentence per line.
        doc.add("\n");
    }
    doc.close();
}
Also used : MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) IOException(java.io.IOException) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) Type(org.apache.uima.cas.Type) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) JCasBuilder(org.apache.uima.fit.factory.JCasBuilder) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)

Example 3 with Dependency

use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.

the class Conll2009Writer method convert.

private void convert(JCas aJCas, PrintWriter aOut) {
    Map<Token, Collection<SemPred>> predIdx = indexCovered(aJCas, Token.class, SemPred.class);
    Map<SemArg, Collection<Token>> argIdx = indexCovered(aJCas, SemArg.class, Token.class);
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();
        // Tokens
        List<Token> tokens = selectCovered(Token.class, sentence);
        // Check if we should try to include the FEATS in output
        List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence);
        boolean useFeats = tokens.size() == morphology.size();
        List<SemPred> preds = selectCovered(SemPred.class, sentence);
        for (int i = 0; i < tokens.size(); i++) {
            Row row = new Row();
            row.id = i + 1;
            row.token = tokens.get(i);
            row.args = new SemArgLink[preds.size()];
            if (useFeats) {
                row.feats = morphology.get(i);
            }
            // If there are multiple semantic predicates for the current token, then
            // we keep only the first
            Collection<SemPred> predsForToken = predIdx.get(row.token);
            if (predsForToken != null && !predsForToken.isEmpty()) {
                row.pred = predsForToken.iterator().next();
            }
            ctokens.put(row.token, row);
        }
        // Dependencies
        List<Dependency> basicDeps = selectCovered(Dependency.class, sentence).stream().filter(dep -> {
            String flavor = FSUtil.getFeature(dep, "flavor", String.class);
            return flavor == null || DependencyFlavor.BASIC.equals(flavor);
        }).collect(Collectors.toList());
        for (Dependency rel : basicDeps) {
            Row row = ctokens.get(rel.getDependent());
            if (row.deprel != null) {
                throw new IllegalStateException("Illegal basic dependency structure - token [" + row.token.getCoveredText() + "] is dependent of more than one dependency.");
            }
            row.deprel = rel;
        }
        // Semantic arguments
        for (int p = 0; p < preds.size(); p++) {
            FSArray args = preds.get(p).getArguments();
            for (SemArgLink arg : select(args, SemArgLink.class)) {
                for (Token t : argIdx.get(arg.getTarget())) {
                    Row row = ctokens.get(t);
                    row.args[p] = arg;
                }
            }
        }
        // Write sentence in CONLL 2009 format
        for (Row row : ctokens.values()) {
            int id = row.id;
            String form = row.token.getCoveredText();
            String lemma = UNUSED;
            if (writeLemma && (row.token.getLemma() != null)) {
                lemma = row.token.getLemma().getValue();
            }
            String plemma = lemma;
            String pos = UNUSED;
            if (writePos && (row.token.getPos() != null)) {
                POS posAnno = row.token.getPos();
                pos = posAnno.getPosValue();
            }
            String ppos = pos;
            String feat = UNUSED;
            if (writeMorph && (row.feats != null)) {
                feat = row.feats.getValue();
            }
            String pfeat = feat;
            int headId = UNUSED_INT;
            String deprel = UNUSED;
            if (writeDependency && (row.deprel != null)) {
                deprel = row.deprel.getDependencyType();
                headId = ctokens.get(row.deprel.getGovernor()).id;
                if (headId == row.id) {
                    // ROOT dependencies may be modeled as a loop, ignore these.
                    headId = 0;
                }
            }
            String head = UNUSED;
            if (headId != UNUSED_INT) {
                head = Integer.toString(headId);
            }
            String phead = head;
            String pdeprel = deprel;
            String fillpred = UNUSED;
            String pred = UNUSED;
            StringBuilder apreds = new StringBuilder();
            if (writeSemanticPredicate) {
                if (row.pred != null) {
                    fillpred = "Y";
                    pred = row.pred.getCategory();
                }
                for (SemArgLink arg : row.args) {
                    if (apreds.length() > 0) {
                        apreds.append('\t');
                    }
                    apreds.append(arg != null ? arg.getRole() : UNUSED);
                }
            }
            aOut.printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred, pred, apreds);
        }
        aOut.println();
    }
}
Also used : FSArray(org.apache.uima.jcas.cas.FSArray) JCasFileWriter_ImplBase(de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) HashMap(java.util.HashMap) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FSUtil(org.apache.uima.fit.util.FSUtil) LinkedHashMap(java.util.LinkedHashMap) TypeCapability(org.apache.uima.fit.descriptor.TypeCapability) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) ConfigurationParameter(org.apache.uima.fit.descriptor.ConfigurationParameter) DependencyFlavor(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg) PrintWriter(java.io.PrintWriter) JCas(org.apache.uima.jcas.JCas) ResourceMetaData(org.apache.uima.fit.descriptor.ResourceMetaData) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) JCasUtil.indexCovered(org.apache.uima.fit.util.JCasUtil.indexCovered) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) ComponentParameters(de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters) IOUtils.closeQuietly(org.apache.commons.io.IOUtils.closeQuietly) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) List(java.util.List) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) JCasUtil.select(org.apache.uima.fit.util.JCasUtil.select) JCasUtil.selectCovered(org.apache.uima.fit.util.JCasUtil.selectCovered) SemArgLink(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LinkedHashMap(java.util.LinkedHashMap) FSArray(org.apache.uima.jcas.cas.FSArray) SemPred(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Collection(java.util.Collection) SemArg(de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)

Example 4 with Dependency

use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.

the class TcfWriter method writeDependency.

private void writeDependency(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
    if (!JCasUtil.exists(aJCas, Dependency.class)) {
        // Do nothing if there are no dependencies in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName() + "]: empty");
        return;
    }
    DependencyParsingLayer dependencyParsingLayer = null;
    String tagSetName = "tiger";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(Dependency.class.getName())) {
            tagSetName = tagSet.getName();
            break;
        }
    }
    dependencyParsingLayer = aTextCorpus.createDependencyParsingLayer(tagSetName, false, true);
    getLogger().debug("Layer [" + TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName() + "]: created");
    for (Sentence s : select(aJCas, Sentence.class)) {
        List<eu.clarin.weblicht.wlfxb.tc.api.Dependency> deps = new ArrayList<>();
        for (Dependency d : selectCovered(Dependency.class, s)) {
            eu.clarin.weblicht.wlfxb.tc.api.Dependency dependency = dependencyParsingLayer.createDependency(d.getDependencyType(), aTokensBeginPositionMap.get(d.getDependent().getBegin()), aTokensBeginPositionMap.get(d.getGovernor().getBegin()));
            deps.add(dependency);
        }
        if (deps.size() > 0) {
            dependencyParsingLayer.addParse(deps);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription) DependencyParsingLayer(eu.clarin.weblicht.wlfxb.tc.api.DependencyParsingLayer) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 5 with Dependency

use of de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency in project webanno by webanno.

the class WebannoTsv1Reader method createDependency.

/**
 * add dependency parsing to CAS
 */
private void createDependency(JCas aJCas, Map<Integer, String> tokens, Map<Integer, String> dependencyFunction, Map<Integer, Integer> dependencyDependent, Map<String, Token> tokensStored) {
    for (int i = 1; i <= tokens.size(); i++) {
        if (dependencyFunction.get(i) != null) {
            Dependency outDependency = new Dependency(aJCas);
            outDependency.setDependencyType(dependencyFunction.get(i));
            // if span A has (start,end)= (20, 26) and B has (start,end)= (30, 36)
            // arc drawn from A to B, dependency will have (start, end) = (20, 36)
            // arc drawn from B to A, still dependency will have (start, end) = (20, 36)
            int begin = 0, end = 0;
            // if not ROOT
            if (dependencyDependent.get(i) != 0) {
                begin = tokensStored.get("t_" + i).getBegin() > tokensStored.get("t_" + dependencyDependent.get(i)).getBegin() ? tokensStored.get("t_" + dependencyDependent.get(i)).getBegin() : tokensStored.get("t_" + i).getBegin();
                end = tokensStored.get("t_" + i).getEnd() < tokensStored.get("t_" + dependencyDependent.get(i)).getEnd() ? tokensStored.get("t_" + dependencyDependent.get(i)).getEnd() : tokensStored.get("t_" + i).getEnd();
            } else {
                begin = tokensStored.get("t_" + i).getBegin();
                end = tokensStored.get("t_" + i).getEnd();
            }
            outDependency.setBegin(begin);
            outDependency.setEnd(end);
            outDependency.setDependent(tokensStored.get("t_" + i));
            if (dependencyDependent.get(i) == 0) {
                outDependency.setGovernor(tokensStored.get("t_" + i));
            } else {
                outDependency.setGovernor(tokensStored.get("t_" + dependencyDependent.get(i)));
            }
            outDependency.addToIndexes();
        }
    }
}
Also used : Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)

Aggregations

Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)14 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)10 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)6 ArrayList (java.util.ArrayList)6 JCas (org.apache.uima.jcas.JCas)5 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 Test (org.junit.Test)3 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)2 AnnotationLayer (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)2 TagSet (de.tudarmstadt.ukp.clarin.webanno.model.TagSet)2 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)2 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)2 SemArg (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg)2 SemArgLink (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink)2 SemPred (de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2