Search in sources :

Example 1 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XCasSchemaAnalyzer method analyze.

public static TsvSchema analyze(TypeSystem aTypeSystem) {
    TsvSchema schema = new TsvSchema();
    Set<Type> chainLinkTypes = new HashSet<>();
    // Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
    // supports such layers.
    Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
    Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
    for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
        if (aTypeSystem.subsumes(documentAnnotationType, type)) {
            continue;
        }
        if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
            continue;
        }
        switch(schema.getLayerType(type)) {
            case RELATION:
                schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
                generateColumns(aTypeSystem, schema, RELATION, type);
                break;
            case CHAIN:
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
                schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
                chainLinkTypes.add(type);
                break;
            case SPAN:
                schema.addColumn(new TsvColumn(type, SPAN));
                generateColumns(aTypeSystem, schema, SPAN, type);
                break;
            case INCOMPATIBLE:
                // Do not generate a column definition for incompatible types.
                break;
        }
    }
    // Scan again for the chain head types
    Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
    for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
        Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
        if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
            schema.addChainHeadType(type);
        }
    }
    return schema;
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Feature(org.apache.uima.cas.Feature) HashSet(java.util.HashSet)

Example 2 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XDeserializer method read.

public void read(LineNumberReader aIn, JCas aJCas) throws IOException {
    deferredActions.set(new ArrayList<>());
    TsvFormatHeader format = readFormat(aIn);
    TsvSchema schema = readSchema(aIn, aJCas);
    // Read the extra blank line after the schema declaration
    String emptyLine = aIn.readLine();
    assert isEmpty(emptyLine);
    TsvDocument doc = new TsvDocument(format, schema, aJCas);
    for (TsvColumn column : schema.getColumns()) {
        doc.activateColumn(column);
        doc.activateType(column.uimaType);
    }
    readContent(aIn, doc);
    // Complete the addition of the chains
    CAS cas = aJCas.getCas();
    for (TsvChain chain : doc.getChains()) {
        if (chain.getElements().isEmpty()) {
            continue;
        }
        Iterator<AnnotationFS> linkIterator = chain.getElements().iterator();
        AnnotationFS link = linkIterator.next();
        // Create the chain head
        FeatureStructure head = cas.createFS(chain.getHeadType());
        setFeature(head, CHAIN_FIRST_FEAT, link);
        cas.addFsToIndexes(head);
        // Connect the links to each other
        AnnotationFS prevLink = link;
        while (linkIterator.hasNext()) {
            link = linkIterator.next();
            setFeature(prevLink, CHAIN_NEXT_FEAT, link);
            prevLink = link;
        }
    }
    // Run deferred actions
    for (Runnable action : deferredActions.get()) {
        action.run();
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) TsvFormatHeader(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) CAS(org.apache.uima.cas.CAS) TsvChain(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)

Example 3 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XSerializerTest method testSingleSubTokenWithValue.

@Test
public void testSingleSubTokenWithValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    addNamedEntity(cas, 1, 3, "PER");
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t1-3\thi\tPER\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 4 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XSerializerTest method testSingleZeroWidthTokenWithoutValue.

@Test
public void testSingleZeroWidthTokenWithoutValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    addNamedEntity(cas, 0, 0, null);
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t0-0\t\t*\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 5 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XSerializerTest method testStackedSingleTokenWithValue.

@Test
public void testStackedSingleTokenWithValue() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    NamedEntity ne1 = addNamedEntity(cas, 0, 4, "PER");
    NamedEntity ne2 = addNamedEntity(cas, 0, 4, "ORG");
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type namedEntityType = cas.getCasType(NamedEntity.type);
    schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne1, true);
    doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne2, true);
    assertEquals("1-1\t0-4\tThis\tPER[1]|ORG[2]\t", doc.getSentences().get(0).getTokens().get(0).toString());
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\tPER[1]|ORG[2]\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Aggregations

TsvSchema (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)9 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)8 LayerType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType)6 TsvDocument (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument)6 Type (org.apache.uima.cas.Type)6 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)5 JCas (org.apache.uima.jcas.JCas)5 Test (org.junit.Test)5 IOException (java.io.IOException)2 Tsv3XSerializer (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.Tsv3XSerializer)1 TsvChain (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain)1 TsvFormatHeader (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader)1 NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)1 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)1 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)1