Search in sources :

Example 1 with CoreferenceLink

use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.

the class TcfReader method storeReferencesAndTargetsInMap.

private void storeReferencesAndTargetsInMap(Map<Integer, CoreferenceLink> aReferencesMap, eu.clarin.weblicht.wlfxb.tc.api.ReferencedEntity entity, TextCorpus aCorpusData, Map<String, Token> aTokens, JCas aJcas) {
    for (Reference reference : entity.getReferences()) {
        StringBuilder sbTokens = new StringBuilder();
        for (eu.clarin.weblicht.wlfxb.tc.api.Token token : aCorpusData.getReferencesLayer().getTokens(reference)) {
            sbTokens.append(token.getID()).append(" ");
        }
        String[] referenceTokens = sbTokens.toString().split(" ");
        int begin = getOffsets(referenceTokens, aTokens)[0];
        int end = getOffsets(referenceTokens, aTokens)[1];
        CoreferenceLink link = new CoreferenceLink(aJcas);
        link.setBegin(begin);
        link.setEnd(end);
        String referencesType = reference.getType() == null ? "nam" : reference.getType();
        link.setReferenceType(referencesType);
        if (reference.getRelation() != null) {
            link.setReferenceRelation(reference.getRelation());
        }
        link.addToIndexes();
        aReferencesMap.put(aJcas.getCasImpl().ll_getFSRef(link), link);
    }
}
Also used : Reference(eu.clarin.weblicht.wlfxb.tc.api.Reference) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink)

Example 2 with CoreferenceLink

use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.

the class WebannoTsv2Writer method convertToTsv.

private void convertToTsv(JCas aJCas, OutputStream aOs, String aEncoding) throws IOException, ResourceInitializationException, CASRuntimeException, CASException {
    LowLevelCAS llCas = aJCas.getLowLevelCas();
    tokenIds = new HashMap<>();
    setTokenId(aJCas, tokenIds);
    tokenPositions = new TreeMap<>();
    setTokenPosition(aJCas, tokenPositions);
    Map<Integer, Integer> getTokensPerSentence = new TreeMap<>();
    setTokenSentenceAddress(aJCas, getTokensPerSentence);
    // list of annotation types
    Set<Type> allTypes = new LinkedHashSet<>();
    for (Annotation a : select(aJCas, Annotation.class)) {
        if (!(a instanceof Token || a instanceof Sentence || a instanceof DocumentMetaData || a instanceof TagsetDescription || a instanceof CoreferenceLink)) {
            allTypes.add(a.getType());
        }
    }
    Set<Type> relationTypes = new LinkedHashSet<>();
    // get all arc types
    for (Type type : allTypes) {
        if (type.getFeatures().size() == 0) {
            continue;
        }
        for (Feature feature : type.getFeatures()) {
            if (feature.getShortName().equals(GOVERNOR)) {
                relationTypes.add(type);
                break;
            }
        }
    }
    allTypes.removeAll(relationTypes);
    // relation annotations
    Map<Type, String> relationTypesMap = new HashMap<>();
    for (Type type : relationTypes) {
        if (type.getName().equals(Dependency.class.getName())) {
            relationTypesMap.put(type, POS.class.getName());
            continue;
        }
        for (AnnotationFS anno : CasUtil.select(aJCas.getCas(), type)) {
            for (Feature feature : type.getFeatures()) {
                if (feature.getShortName().equals(GOVERNOR)) {
                    relationTypesMap.put(type, anno.getFeatureValue(feature).getType().getName());
                }
            }
        }
    }
    // all span annotation first
    Map<Feature, Type> spanFeatures = new LinkedHashMap<>();
    allTypes: for (Type type : allTypes) {
        if (type.getFeatures().size() == 0) {
            continue;
        }
        for (Feature feature : type.getFeatures()) {
            // coreference annotation not supported
            if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue allTypes;
            }
        }
        IOUtils.write(" # " + type.getName(), aOs, aEncoding);
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
                continue;
            }
            spanFeatures.put(feature, type);
            IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
        }
    }
    // write all relation annotation first
    Set<Feature> relationFeatures = new LinkedHashSet<>();
    for (Type type : relationTypes) {
        IOUtils.write(" # " + type.getName(), aOs, aEncoding);
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                continue;
            }
            relationFeatures.add(feature);
            IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
        }
        // Add the attach type for the realtion anotation
        IOUtils.write(" | AttachTo=" + relationTypesMap.get(type), aOs, aEncoding);
    }
    IOUtils.write("\n", aOs, aEncoding);
    Map<Feature, Map<Integer, String>> allAnnos = new HashMap<>();
    allTypes: for (Type type : allTypes) {
        for (Feature feature : type.getFeatures()) {
            // coreference annotation not supported
            if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue allTypes;
            }
        }
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
                continue;
            }
            Map<Integer, String> tokenAnnoMap = new TreeMap<>();
            setTokenAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
            allAnnos.put(feature, tokenAnnoMap);
        }
    }
    // get tokens where dependents are drown to
    Map<Feature, Map<Integer, String>> relAnnos = new HashMap<>();
    for (Type type : relationTypes) {
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                continue;
            }
            Map<Integer, String> tokenAnnoMap = new HashMap<>();
            setRelationFeatureAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
            relAnnos.put(feature, tokenAnnoMap);
        }
    }
    // get tokens where dependents are drown from - the governor
    Map<Type, Map<Integer, String>> governorAnnos = new HashMap<>();
    for (Type type : relationTypes) {
        Map<Integer, String> govAnnoMap = new HashMap<>();
        setRelationGovernorPos(aJCas.getCas(), govAnnoMap, type);
        governorAnnos.put(type, govAnnoMap);
    }
    int sentId = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        IOUtils.write("#id=" + sentId++ + "\n", aOs, aEncoding);
        IOUtils.write("#text=" + sentence.getCoveredText().replace("\n", "") + "\n", aOs, aEncoding);
        for (Token token : selectCovered(Token.class, sentence)) {
            IOUtils.write(tokenIds.get(llCas.ll_getFSRef(token)) + "\t" + token.getCoveredText() + "\t", aOs, aEncoding);
            // all span annotations on this token
            for (Feature feature : spanFeatures.keySet()) {
                String annos = allAnnos.get(feature).get(llCas.ll_getFSRef(token));
                if (annos == null) {
                    if (multipleSpans.contains(spanFeatures.get(feature).getName())) {
                        IOUtils.write("O\t", aOs, aEncoding);
                    } else {
                        IOUtils.write("_\t", aOs, aEncoding);
                    }
                } else {
                    IOUtils.write(annos + "\t", aOs, aEncoding);
                }
            }
            for (Type type : relationTypes) {
                for (Feature feature : type.getFeatures()) {
                    if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
                        continue;
                    }
                    String annos = relAnnos.get(feature).get(llCas.ll_getFSRef(token));
                    if (annos == null) {
                        IOUtils.write("_\t", aOs, aEncoding);
                    } else {
                        IOUtils.write(annos + "\t", aOs, aEncoding);
                    }
                }
                // the governor positions
                String govPos = governorAnnos.get(type).get(llCas.ll_getFSRef(token));
                if (govPos == null) {
                    IOUtils.write("_\t", aOs, aEncoding);
                } else {
                    IOUtils.write(governorAnnos.get(type).get(llCas.ll_getFSRef(token)) + "\t", aOs, aEncoding);
                }
            }
            IOUtils.write("\n", aOs, aEncoding);
        }
        IOUtils.write("\n", aOs, aEncoding);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.apache.uima.cas.Feature) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) LowLevelCAS(org.apache.uima.cas.impl.LowLevelCAS) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) TreeMap(java.util.TreeMap) Annotation(org.apache.uima.jcas.tcas.Annotation) Type(org.apache.uima.cas.Type) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap)

Example 3 with CoreferenceLink

use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.

the class TcfReader method convertCoreference.

/**
 * Correferences in CAS should be represented {@link CoreferenceChain} and
 * {@link CoreferenceLink}. The TCF representation Uses <b> rel </b> and
 * <b>target </b> to build chains. Example: </br>
 * <i> {@literal  <entity><reference ID="rc_0" tokenIDs="t_0" mintokIDs=
 * "t_0" type="nam"/> } </br>
 * {@literal <reference ID="rc_1" tokenIDs="t_6" mintokIDs="t_6" type=
 * "pro.per3" rel=
 * "anaphoric" target="rc_0"/></entity>
 * }</i> </br>
 * The first phase of conversion is getting all <b>references</b> and
 * <b>targets</b> alongside the <b>type</b> and <b>relations in different
 * maps</b> <br>
 * Second, an iteration is made through all the maps and the
 * {@link CoreferenceChain} and {@link CoreferenceLink} annotations are
 * constructed.
 */
private void convertCoreference(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
    if (aCorpusData.getReferencesLayer() == null) {
        // No layer to read from.
        return;
    }
    for (int i = 0; i < aCorpusData.getReferencesLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.ReferencedEntity entity = aCorpusData.getReferencesLayer().getReferencedEntity(i);
        Map<Integer, CoreferenceLink> referencesMap = new TreeMap<>();
        storeReferencesAndTargetsInMap(referencesMap, entity, aCorpusData, aTokens, aJCas);
        CoreferenceChain chain = new CoreferenceChain(aJCas);
        CoreferenceLink link = null;
        for (Integer address : referencesMap.keySet()) {
            if (chain.getFirst() == null) {
                chain.setFirst(referencesMap.get(address));
                link = chain.getFirst();
                chain.addToIndexes();
            } else {
                link.setNext(referencesMap.get(address));
                if (link.getReferenceRelation() == null) {
                    link.setReferenceRelation(referencesMap.get(address).getReferenceRelation());
                }
                link = link.getNext();
                link.addToIndexes();
            }
        }
    }
}
Also used : CoreferenceChain(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink) TreeMap(java.util.TreeMap)

Example 4 with CoreferenceLink

use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.

the class TcfWriter method writeCoreference.

private void writeCoreference(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
    if (!JCasUtil.exists(aJCas, CoreferenceChain.class)) {
        // Do nothing if there are no coreference chains in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.REFERENCES.getXmlName() + "]: empty");
        return;
    }
    String tagSetName = "TueBaDz";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(CoreferenceLink.class.getName())) {
            tagSetName = tagSet.getName();
            break;
        }
    }
    ReferencesLayer coreferencesLayer = aTextCorpus.createReferencesLayer(null, tagSetName, null);
    getLogger().debug("Layer [" + TextCorpusLayerTag.REFERENCES.getXmlName() + "]: created");
    for (CoreferenceChain chain : select(aJCas, CoreferenceChain.class)) {
        CoreferenceLink prevLink = null;
        Reference prevRef = null;
        List<Reference> refs = new ArrayList<>();
        for (CoreferenceLink link : chain.links()) {
            // Get covered tokens
            List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokens = new ArrayList<>();
            for (Token token : selectCovered(Token.class, link)) {
                tokens.add(aTokensBeginPositionMap.get(token.getBegin()));
            }
            // Create current reference
            Reference ref = coreferencesLayer.createReference(link.getReferenceType(), tokens, null);
            // Special handling for expletive relations
            if (REL_TYPE_EXPLETIVE.equals(link.getReferenceRelation())) {
                coreferencesLayer.addRelation(ref, REL_TYPE_EXPLETIVE);
                // chain, so we bail out here.
                continue;
            }
            // Create relation between previous and current reference
            if (prevLink != null) {
                coreferencesLayer.addRelation(prevRef, prevLink.getReferenceRelation(), ref);
            }
            prevLink = link;
            prevRef = ref;
            refs.add(ref);
        }
        coreferencesLayer.addReferent(refs);
    }
}
Also used : CoreferenceChain(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain) Reference(eu.clarin.weblicht.wlfxb.tc.api.Reference) ArrayList(java.util.ArrayList) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink) ReferencesLayer(eu.clarin.weblicht.wlfxb.tc.api.ReferencesLayer)

Example 5 with CoreferenceLink

use of de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink in project webanno by webanno.

the class WebAnnoSemanticGraphReader method convertToCas.

public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {
    StringBuilder text = new StringBuilder();
    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    int tokenBeginPosition = 0;
    while (lineIterator.hasNext()) {
        String line = lineIterator.next();
        String[] contents = line.split("\t>\t|\tX\t");
        int sentenceBegin = tokenBeginPosition;
        int chainBegin = tokenBeginPosition;
        int chainEnd = 0;
        StringTokenizer st = new StringTokenizer(contents[0]);
        while (st.hasMoreTokens()) {
            String content = st.nextToken();
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            chainEnd = tokenBeginPosition;
            text.append(content).append(" ");
        }
        CoreferenceChain chain = new CoreferenceChain(aJCas);
        CoreferenceLink link = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
        link.setReferenceType("text");
        link.addToIndexes();
        chain.setFirst(link);
        if (line.contains("\t>\t")) {
            link.setReferenceRelation("entails");
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            text.append("> ");
        } else {
            link.setReferenceRelation("do not entails");
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            text.append("X ");
        }
        chainBegin = tokenBeginPosition;
        st = new StringTokenizer(contents[0]);
        while (st.hasMoreTokens()) {
            String content = st.nextToken();
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            chainEnd = tokenBeginPosition;
            text.append(content).append(" ");
        }
        CoreferenceLink nextLink = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
        nextLink.setReferenceType("hypothesis");
        nextLink.addToIndexes();
        link.setNext(nextLink);
        chain.addToIndexes();
        text.append("\n");
        Sentence outSentence = new Sentence(aJCas);
        outSentence.setBegin(sentenceBegin);
        outSentence.setEnd(tokenBeginPosition);
        outSentence.addToIndexes();
        tokenBeginPosition = tokenBeginPosition + 1;
        sentenceBegin = tokenBeginPosition;
    }
    aJCas.setDocumentText(text.toString());
}
Also used : StringTokenizer(java.util.StringTokenizer) CoreferenceChain(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain) CoreferenceLink(de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LineIterator(org.apache.commons.io.LineIterator) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Aggregations

CoreferenceLink (de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink)5 CoreferenceChain (de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain)3 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)3 TagsetDescription (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription)2 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)2 Reference (eu.clarin.weblicht.wlfxb.tc.api.Reference)2 TreeMap (java.util.TreeMap)2 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)1 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)1 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)1 ReferencesLayer (eu.clarin.weblicht.wlfxb.tc.api.ReferencesLayer)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 Map (java.util.Map)1 NavigableMap (java.util.NavigableMap)1 StringTokenizer (java.util.StringTokenizer)1 LineIterator (org.apache.commons.io.LineIterator)1 Feature (org.apache.uima.cas.Feature)1