Search in sources :

Example 1 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method scanUnitForActiveColumns.

private static void scanUnitForActiveColumns(TsvUnit aUnit) {
    for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
        List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
        if (!annotationsForColumn.isEmpty()) {
            if (!PLACEHOLDER.equals(col.featureType)) {
                aUnit.getDocument().activateColumn(col);
            }
            // actual annotation.
            if (RELATION.equals(col.layerType) && RELATION_REF.equals(col.featureType)) {
                AnnotationFS annotation = annotationsForColumn.get(0);
                FeatureStructure target = FSUtil.getFeature(annotation, FEAT_REL_SOURCE, FeatureStructure.class);
                if (target == null) {
                    throw new IllegalStateException("Relation does not have its source feature (" + FEAT_REL_SOURCE + ") set: " + annotation);
                }
                if (col.uimaType.getName().equals(Dependency.class.getName())) {
                    // COMPATIBILITY NOTE:
                    // WebAnnoTsv3Writer hard-changes the target type for DKPro Core
                    // Dependency annotations from Token to POS - the reason is not really
                    // clear. Probably because the Dependency relations in the WebAnno UI
                    // attach to POS (Token's are not visible as annotations in the UI).
                    col.setTargetTypeHint(aUnit.getDocument().getJCas().getTypeSystem().getType(POS.class.getName()));
                } else {
                    col.setTargetTypeHint(target.getType());
                }
            }
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)

Example 2 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method scanUnitForAmbiguousSlotReferences.

/**
 * If a slot feature has the target type Annotation, then any kind of annotation can be
 * used as slot filler. In this case, the targets are ambiguous and require an disambiguaton
 * ID.
 */
private static void scanUnitForAmbiguousSlotReferences(TsvUnit aUnit) {
    for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
        if (SPAN.equals(col.layerType) && SLOT_TARGET.equals(col.featureType) && CAS.TYPE_NAME_ANNOTATION.equals(col.getTargetTypeHint().getName())) {
            List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
            for (AnnotationFS aFS : annotationsForColumn) {
                FeatureStructure[] links = getFeature(aFS, col.uimaFeature, FeatureStructure[].class);
                for (FeatureStructure link : links) {
                    AnnotationFS targetFS = getFeature(link, TsvSchema.FEAT_SLOT_TARGET, AnnotationFS.class);
                    if (targetFS == null) {
                        throw new IllegalStateException("Slot link has no target: " + link);
                    }
                    aUnit.getDocument().addDisambiguationId(targetFS);
                }
            }
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)

Example 3 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method of.

public static TsvDocument of(TsvSchema aSchema, JCas aJCas) {
    TsvFormatHeader format = new TsvFormatHeader("WebAnno TSV", "3.2");
    TsvDocument doc = new TsvDocument(format, aSchema, aJCas);
    // Fill document with all the sentences and tokens
    for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
        TsvSentence sentence = doc.createSentence(uimaSentence);
        for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
            sentence.createToken(uimaToken);
        }
    }
    // Scan for chains
    for (Type headType : aSchema.getChainHeadTypes()) {
        for (FeatureStructure chainHead : CasUtil.selectFS(aJCas.getCas(), headType)) {
            List<AnnotationFS> elements = new ArrayList<>();
            AnnotationFS link = getFeature(chainHead, CHAIN_FIRST_FEAT, AnnotationFS.class);
            while (link != null) {
                elements.add(link);
                link = getFeature(link, CHAIN_NEXT_FEAT, AnnotationFS.class);
            }
            if (!elements.isEmpty()) {
                Type elementType = headType.getFeatureByBaseName(CHAIN_FIRST_FEAT).getRange();
                doc.createChain(headType, elementType, elements);
            }
        }
    }
    // Build indexes over the token start and end positions such that we can quickly locate
    // tokens based on their offsets.
    NavigableMap<Integer, TsvToken> tokenBeginIndex = new TreeMap<>();
    NavigableMap<Integer, TsvToken> tokenEndIndex = new TreeMap<>();
    List<TsvToken> tokens = new ArrayList<>();
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            tokenBeginIndex.put(token.getBegin(), token);
            tokenEndIndex.put(token.getEnd(), token);
            tokens.add(token);
        }
    }
    // units.
    for (Type type : aSchema.getUimaTypes()) {
        LayerType layerType = aSchema.getLayerType(type);
        boolean addDisambiguationIdIfStacked = SPAN.equals(layerType);
        for (AnnotationFS annotation : CasUtil.select(aJCas.getCas(), type)) {
            doc.activateType(annotation.getType());
            // Get the relevant begin and end offsets for the current annotation
            int begin = annotation.getBegin();
            int end = annotation.getEnd();
            // to be sure.
            if (RELATION.equals(layerType)) {
                AnnotationFS targetFS = getFeature(annotation, FEAT_REL_TARGET, AnnotationFS.class);
                begin = targetFS.getBegin();
                end = targetFS.getEnd();
            }
            TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue();
            TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue();
            // value obtained from the tokenBeginIndex.
            if (begin == end) {
                beginToken = endToken;
            }
            boolean singleToken = beginToken == endToken;
            boolean zeroWitdh = begin == end;
            boolean multiTokenCapable = SPAN.equals(layerType) || CHAIN.equals(layerType);
            // in either case.
            if (beginToken.getBegin() == begin && endToken.getEnd() == end) {
                doc.mapFS2Unit(annotation, beginToken);
                beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                if (multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            } else if (zeroWitdh) {
                TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                doc.mapFS2Unit(annotation, t);
                t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
            } else {
                // the annotation.
                if (beginToken.getBegin() < begin) {
                    TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                    doc.mapFS2Unit(annotation, t);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                } else // If not the sub-token is ID-defining, then the begin token is ID-defining
                {
                    beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    doc.mapFS2Unit(annotation, beginToken);
                }
                // checking if if singleToke is true.
                if (endToken.getEnd() > end) {
                    TsvSubToken t = endToken.createSubToken(max(endToken.getBegin(), begin), end);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    if (!singleToken) {
                        doc.mapFS2Unit(annotation, t);
                    }
                } else if (!singleToken && multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            }
            // the end token
            if (multiTokenCapable && !singleToken) {
                ListIterator<TsvToken> i = tokens.listIterator(tokens.indexOf(beginToken));
                TsvToken t;
                while ((t = i.next()) != endToken) {
                    if (t != beginToken) {
                        t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    }
                }
            }
            // Multi-token span annotations must get a disambiguation ID
            if (SPAN.equals(layerType) && !singleToken) {
                doc.addDisambiguationId(annotation);
            }
        }
    }
    // Scan all created units to see which columns actually contains values
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            scanUnitForActiveColumns(token);
            scanUnitForAmbiguousSlotReferences(token);
            for (TsvSubToken subToken : token.getSubTokens()) {
                scanUnitForActiveColumns(subToken);
                scanUnitForAmbiguousSlotReferences(subToken);
            }
        }
    }
    // Activate the placeholder columns for any active types for which no other columns are
    // active.
    Set<Type> activeTypesNeedingPlaceholders = new HashSet<>(doc.getActiveTypes());
    for (TsvColumn col : doc.getActiveColumns()) {
        activeTypesNeedingPlaceholders.remove(col.uimaType);
    }
    for (TsvColumn col : doc.getSchema().getColumns()) {
        if (PLACEHOLDER.equals(col.featureType) && activeTypesNeedingPlaceholders.contains(col.uimaType)) {
            doc.activateColumn(col);
        }
    }
    return doc;
}
Also used : TsvFormatHeader(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader) ArrayList(java.util.ArrayList) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) TreeMap(java.util.TreeMap) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) HashSet(java.util.HashSet)

Example 4 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XCasSchemaAnalyzer method analyze.

public static TsvSchema analyze(TypeSystem aTypeSystem) {
    TsvSchema schema = new TsvSchema();
    Set<Type> chainLinkTypes = new HashSet<>();
    // Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
    // supports such layers.
    Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
    Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
    for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
        if (aTypeSystem.subsumes(documentAnnotationType, type)) {
            continue;
        }
        if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
            continue;
        }
        switch(schema.getLayerType(type)) {
            case RELATION:
                schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
                generateColumns(aTypeSystem, schema, RELATION, type);
                break;
            case CHAIN:
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
                chainLinkTypes.add(type);
                break;
            case SPAN:
                schema.addColumn(new TsvColumn(type, SPAN));
                generateColumns(aTypeSystem, schema, SPAN, type);
                break;
            case INCOMPATIBLE:
                // Do not generate a column definition for incompatible types.
                break;
        }
    }
    // Scan again for the chain head types
    Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
    for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
        Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
        if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
            schema.addChainHeadType(type);
        }
    }
    return schema;
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Feature(org.apache.uima.cas.Feature) HashSet(java.util.HashSet)

Example 5 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XDeserializer method read.

public void read(LineNumberReader aIn, JCas aJCas) throws IOException {
    deferredActions.set(new ArrayList<>());
    TsvFormatHeader format = readFormat(aIn);
    TsvSchema schema = readSchema(aIn, aJCas);
    // Read the extra blank line after the schema declaration
    String emptyLine = aIn.readLine();
    assert isEmpty(emptyLine);
    TsvDocument doc = new TsvDocument(format, schema, aJCas);
    for (TsvColumn column : schema.getColumns()) {
        doc.activateColumn(column);
        doc.activateType(column.uimaType);
    }
    readContent(aIn, doc);
    // Complete the addition of the chains
    CAS cas = aJCas.getCas();
    for (TsvChain chain : doc.getChains()) {
        if (chain.getElements().isEmpty()) {
            continue;
        }
        Iterator<AnnotationFS> linkIterator = chain.getElements().iterator();
        AnnotationFS link = linkIterator.next();
        // Create the chain head
        FeatureStructure head = cas.createFS(chain.getHeadType());
        setFeature(head, CHAIN_FIRST_FEAT, link);
        cas.addFsToIndexes(head);
        // Connect the links to each other
        AnnotationFS prevLink = link;
        while (linkIterator.hasNext()) {
            link = linkIterator.next();
            setFeature(prevLink, CHAIN_NEXT_FEAT, link);
            prevLink = link;
        }
    }
    // Run deferred actions
    for (Runnable action : deferredActions.get()) {
        action.run();
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvFormatHeader(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) CAS(org.apache.uima.cas.CAS) TsvChain(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)

Aggregations

TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)16 Type (org.apache.uima.cas.Type)9 LayerType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType)8 TsvSchema (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)8 TsvDocument (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument)7 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)5 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)5 JCas (org.apache.uima.jcas.JCas)5 Test (org.junit.Test)5 FeatureStructure (org.apache.uima.cas.FeatureStructure)4 TsvFormatHeader (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader)2 TsvToken (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken)2 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)2 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Feature (org.apache.uima.cas.Feature)2 TsvChain (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain)1 TsvSentence (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence)1