Search in sources :

Example 6 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XDeserializer method parseColumnDeclaration.

private TsvColumn parseColumnDeclaration(JCas aJCas, LayerType aLayerType, Type aUimaType, int aIndex, String aColDecl, TsvColumn aPrevCol) throws IOException {
    TypeSystem ts = aJCas.getTypeSystem();
    TsvColumn column;
    // SLOT_ROLE - starts with "ROLE_"
    if (SPAN.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_ROLE)) {
        String[] subFields = splitPreserveAllTokens(aColDecl, '_');
        String featureName = substringAfter(subFields[1], ":");
        Feature feat = aUimaType.getFeatureByBaseName(featureName);
        if (feat == null) {
            throw new IOException("CAS type [" + aUimaType.getName() + "] does not have a feature called [" + featureName + "]");
        }
        column = new TsvColumn(aIndex, aUimaType, aLayerType, featureName, SLOT_ROLE);
        String typeName = subFields[2];
        Type type = ts.getType(typeName);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + typeName + "]");
        }
        column.setTargetTypeHint(type);
    } else // RELATION_REF - starts with "BT_
    if (RELATION.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_BASE_TYPE)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, FEAT_REL_SOURCE, RELATION_REF);
        String typeName = substringAfter(aColDecl, HEADER_PREFIX_BASE_TYPE);
        Type type = ts.getType(typeName);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + typeName + "]");
        }
        column.setTargetTypeHint(type);
    } else // CHAIN_ELEMENT_TYPE - "referenceType"
    if (CHAIN.equals(aLayerType) && COREFERENCE_TYPE_FEATURE.equals(aColDecl)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_TYPE_FEATURE, CHAIN_ELEMENT_TYPE);
    } else // CHAIN_LINK_TYPE - "referenceRelation"
    if (CHAIN.equals(aLayerType) && COREFERENCE_RELATION_FEATURE.equals(aColDecl)) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_RELATION_FEATURE, CHAIN_LINK_TYPE);
    } else // SLOT_TARGET - name of the link target type
    if (SPAN.equals(aLayerType) && aColDecl.contains(".") || ts.getType(aColDecl) != null) {
        // the type name really exists in the target CAS.
        if (ts.getType(aColDecl) == null) {
            throw new IOException("CAS type system does not contain a type named [" + aColDecl + "]");
        }
        // name from it.
        if (aPrevCol == null || !SLOT_ROLE.equals(aPrevCol.featureType)) {
            throw new IOException("Slot target column declaration must follow slot role column declaration");
        }
        column = new TsvColumn(aIndex, aUimaType, aLayerType, aPrevCol.uimaFeature.getShortName(), SLOT_TARGET);
        Type type = ts.getType(aColDecl);
        if (type == null) {
            throw new IOException("CAS does not contain a type called [" + aColDecl + "]");
        }
        column.setTargetTypeHint(type);
    } else // PRIMITIVE - feature name
    if (aUimaType.getFeatureByBaseName(aColDecl) != null) {
        column = new TsvColumn(aIndex, aUimaType, aLayerType, aColDecl, PRIMITIVE);
    } else {
        throw new IOException("Type [" + aUimaType.getName() + "] does not contain a feature called [" + aColDecl + "]");
    }
    return column;
}
Also used : TypeSystem(org.apache.uima.cas.TypeSystem) Type(org.apache.uima.cas.Type) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) IOException(java.io.IOException) FSUtil.setFeature(org.apache.uima.fit.util.FSUtil.setFeature) FSUtil.getFeature(org.apache.uima.fit.util.FSUtil.getFeature) Feature(org.apache.uima.cas.Feature)

Example 7 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XDeserializer method getOrCreateSpanAnnotation.

private AnnotationFS getOrCreateSpanAnnotation(TsvColumn aCol, TsvUnit aUnit, int aStackingIndex, String aDisambiguationInfo) {
    int disambiguationId = aDisambiguationInfo != null ? Integer.valueOf(aDisambiguationInfo) : -1;
    // Check if we have seen the same annotation already in the current unit but in another
    // column.
    AnnotationFS annotation = aUnit.getUimaAnnotation(aCol.uimaType, aStackingIndex);
    // If not, check if we have seen the same annotation already in a previous unit
    if (annotation == null && disambiguationId != -1) {
        annotation = aUnit.getDocument().getDisambiguatedAnnotation(disambiguationId);
        if (annotation != null) {
            aUnit.addUimaAnnotation(annotation);
            // Extend the span of the existing annotation
            // Unfortunately, the AnnotationFS interface does not define a setEnd() method.
            setFeature(annotation, CAS.FEATURE_BASE_NAME_END, aUnit.getEnd());
        }
    }
    // Still no annotation? Then we have to create one
    if (annotation == null) {
        annotation = aUnit.getDocument().getJCas().getCas().createAnnotation(aCol.uimaType, aUnit.getBegin(), aUnit.getEnd());
        aUnit.addUimaAnnotation(annotation);
        // Check if there are slot features that need to be initialized
        List<TsvColumn> otherColumnsForType = aUnit.getDocument().getSchema().getColumns(aCol.uimaType);
        for (TsvColumn col : otherColumnsForType) {
            if (SLOT_TARGET.equals(col.featureType)) {
                setFeature(annotation, col.uimaFeature.getShortName(), emptyList());
            }
        }
        // Special handling of DKPro Core Token-attached annotations
        if (Lemma.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setLemma((Lemma) annotation);
        }
        if (Stem.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setStem((Stem) annotation);
        }
        if (MorphologicalFeatures.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setMorph((MorphologicalFeatures) annotation);
        }
        if (POS.class.getName().equals(aCol.uimaType.getName())) {
            TsvToken token = (TsvToken) aUnit;
            token.getUimaToken().setPos((POS) annotation);
        }
    }
    // to extend the range of multi-token IDs.
    if (disambiguationId != -1) {
        aUnit.getDocument().addDisambiguationId(annotation, disambiguationId);
    }
    return annotation;
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) MorphologicalFeatures(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)

Example 8 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSerializerTest method testSingleSubTokenWithValue.

@Test
public void testSingleSubTokenWithValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    addNamedEntity(cas, 1, 3, "PER");
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t1-3\thi\tPER\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 9 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSerializerTest method testSingleZeroWidthTokenWithoutValue.

@Test
public void testSingleZeroWidthTokenWithoutValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    addNamedEntity(cas, 0, 0, null);
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t0-0\t\t*\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 10 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSerializerTest method testStackedSingleTokenWithValue.

@Test
public void testStackedSingleTokenWithValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    NamedEntity ne1 = addNamedEntity(cas, 0, 4, "PER");
    NamedEntity ne2 = addNamedEntity(cas, 0, 4, "ORG");
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne1, true);
    doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne2, true);
    assertEquals("1-1\t0-4\tThis\tPER[1]|ORG[2]\t", doc.getSentences().get(0).getTokens().get(0).toString());
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\tPER[1]|ORG[2]\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Aggregations

TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)16 Type (org.apache.uima.cas.Type)9 LayerType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType)8 TsvSchema (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)8 TsvDocument (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument)7 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)5 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)5 JCas (org.apache.uima.jcas.JCas)5 Test (org.junit.Test)5 FeatureStructure (org.apache.uima.cas.FeatureStructure)4 TsvFormatHeader (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader)2 TsvToken (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken)2 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)2 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Feature (org.apache.uima.cas.Feature)2 TsvChain (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain)1 TsvSentence (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence)1