Search in sources :

Example 11 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XDeserializer method readSchema.

private TsvSchema readSchema(LineNumberReader aIn, JCas aJCas) throws IOException {
    TsvSchema schema = new TsvSchema();
    int columnIndex = 0;
    // Read first line
    for (String line = aIn.readLine(); !isBlank(line); line = aIn.readLine()) {
        LayerType layerType;
        // Determine layer type
        if (startsWith(line, HEADER_PREFIX_SPAN_LAYER)) {
            layerType = SPAN;
        } else if (startsWith(line, HEADER_PREFIX_RELATION_LAYER)) {
            layerType = RELATION;
        } else if (startsWith(line, HEADER_PREFIX_CHAIN_LAYER)) {
            layerType = CHAIN;
        } else {
            // End of header
            break;
        }
        // Split up layer declaration
        String rest = substringAfter(line, HEADER_LAYER_PREFIX_SEPARATOR);
        String[] fields = split(rest, HEADER_FIELD_SEPARATOR);
        // Get the type name and the corresponding UIMA type from the type system of the
        // target CAS
        String typeName = fields[0];
        Type uimaType = aJCas.getTypeSystem().getType(typeName);
        if (uimaType == null) {
            throw new IOException("CAS type system does not contain a type named [" + typeName + "]");
        }
        // Parse the column declarations starting at the second field (the first is the
        // type name)
        TsvColumn prevColumn = null;
        for (int i = 1; i < fields.length; i++) {
            String colDecl = fields[i];
            TsvColumn col = parseColumnDeclaration(aJCas, layerType, uimaType, columnIndex, colDecl, prevColumn);
            schema.addColumn(col);
            columnIndex++;
            prevColumn = col;
        }
        // If there is no second field, then add a placeholder column
        if (fields.length == 1) {
            schema.addColumn(new TsvColumn(columnIndex, uimaType, layerType));
            columnIndex++;
        }
    }
    return schema;
}
Also used : Type(org.apache.uima.cas.Type) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) IOException(java.io.IOException)

Example 12 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XDeserializer method parseAnnotations.

private void parseAnnotations(TsvDocument aDoc, TsvSentence aSentence, TsvUnit aUnit, String[] aFields) {
    List<TsvColumn> headerColumns = aDoc.getSchema().getHeaderColumns(aDoc.getSchema().getColumns());
    for (TsvColumn col : headerColumns) {
        String rawValue = aFields[col.index + 3];
        if (NULL_COLUMN.equals(rawValue)) {
            continue;
        }
        String[] stackedValues = STACK_SEP_PATTERN.split(rawValue);
        int index = 0;
        for (String val : stackedValues) {
            parseAnnotation(aDoc, aSentence, aUnit, col, index, val);
            index++;
        }
    }
}
Also used : TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)

Example 13 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSchemaAnalyzerTest method testAnalyze.

@Test
public void testAnalyze() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    TsvSchema schema = Tsv3XCasSchemaAnalyzer.analyze(jcas.getTypeSystem());
    for (TsvColumn col : schema.getColumns()) {
        System.out.println(col);
    }
}
Also used : TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 14 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSerializerTest method testRelation.

@Test
public void testRelation() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    List<Token> tokens = new ArrayList<>(select(cas, Token.class));
    Dependency dep = new Dependency(cas);
    dep.setGovernor(tokens.get(0));
    dep.setDependent(tokens.get(1));
    dep.setDependencyType("dep");
    dep.setBegin(dep.getDependent().getBegin());
    dep.setEnd(dep.getDependent().getEnd());
    dep.addToIndexes();
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type dependencyType = cas.getCasType(Dependency.type);
    schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "DependencyType", FeatureType.PRIMITIVE));
    schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "Governor", FeatureType.RELATION_REF));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    doc.getSentences().get(0).getTokens().get(1).addUimaAnnotation(dep, false);
    assertEquals(join(asList("1-1\t0-4\tThis\t_\t_\t", "1-2\t5-7\tis\tdep\t1-1\t"), "\n"), join(asList(doc.getToken(0, 0), doc.getToken(0, 1)), "\n"));
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t_\t\n" + "1-2\t5-7\tis\tdep\t1-1\t\n" + "1-3\t8-9\ta\t_\t_\t\n" + "1-4\t10-14\ttest\t_\t_\t\n" + "1-5\t15-16\t.\t_\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) ArrayList(java.util.ArrayList) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Example 15 with TsvColumn

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.

the class Tsv3XSerializer method write.

public void write(PrintWriter aOut, TsvSchema aSchema, Set<TsvColumn> aActiveColumns) {
    Type currentType = null;
    List<TsvColumn> headerColumns = aSchema.getHeaderColumns(aActiveColumns);
    for (TsvColumn col : headerColumns) {
        if (currentType == null || !currentType.equals(col.uimaType)) {
            if (currentType != null) {
                aOut.print(LINE_BREAK);
            }
            currentType = col.uimaType;
            switch(col.layerType) {
                case SPAN:
                    aOut.print(HEADER_PREFIX_SPAN_LAYER);
                    break;
                case RELATION:
                    aOut.print(HEADER_PREFIX_RELATION_LAYER);
                    break;
                case CHAIN:
                    aOut.print(HEADER_PREFIX_CHAIN_LAYER);
                    break;
            }
            aOut.print(col.uimaType.getName());
        }
        if (RELATION_REF.equals(col.featureType)) {
            aOut.print(HEADER_FIELD_SEPARATOR);
            aOut.print(HEADER_PREFIX_BASE_TYPE);
            if (col.getTargetTypeHint() != null) {
                // COMPATIBILITY NOTE:
                // WebAnnoTsv3Writer obtains the type of a relation target column not from
                // the type system definition but rather by looking at target used by the
                // first actual annotation. This assumes that relations are always only on
                // a single type.
                aOut.printf(col.getTargetTypeHint().getName());
            } else {
                aOut.printf(col.uimaFeature.getRange().getName());
            }
        } else if (SLOT_TARGET.equals(col.featureType)) {
            if (col.getTargetTypeHint() != null) {
                // COMPATIBILITY NOTE:
                // WebAnnoTsv3Writer obtains the type of a slot target column not from
                // the type system definition but rather by looking at target used by the
                // first actual annotation.
                aOut.print(HEADER_FIELD_SEPARATOR);
                aOut.print(col.getTargetTypeHint());
            } else {
                aOut.print(HEADER_FIELD_SEPARATOR);
                aOut.print(col.uimaFeature.getRange().getName());
            }
        } else if (SLOT_ROLE.equals(col.featureType)) {
            aOut.print(HEADER_FIELD_SEPARATOR);
            aOut.print(HEADER_PREFIX_ROLE);
            aOut.printf("%s_%s", col.uimaFeature.getName(), col.uimaFeature.getRange().getComponentType().getName());
        } else if (SLOT_TARGET.equals(col.featureType)) {
            aOut.print(HEADER_FIELD_SEPARATOR);
            aOut.print(col.uimaFeature.getRange().getComponentType().getFeatureByBaseName(FEAT_SLOT_TARGET).getRange().getName());
        } else {
            // COMPATIBILITY NOTE:
            // Yes, this pipe symbol needs to be written
            aOut.print("|");
            if (col.uimaFeature != null) {
                aOut.print(col.uimaFeature.getShortName());
            }
        }
    }
    // Add line-break to terminate the final column definition
    if (!headerColumns.isEmpty()) {
        aOut.print(LINE_BREAK);
    }
    // COMPATIBILITY NOTE:
    // This is really just to make the output match exactly TSV3
    aOut.print(LINE_BREAK);
}
Also used : Type(org.apache.uima.cas.Type) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)

Aggregations

TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)16 Type (org.apache.uima.cas.Type)9 LayerType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType)8 TsvSchema (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)8 TsvDocument (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument)7 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)5 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)5 JCas (org.apache.uima.jcas.JCas)5 Test (org.junit.Test)5 FeatureStructure (org.apache.uima.cas.FeatureStructure)4 TsvFormatHeader (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader)2 TsvToken (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken)2 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)2 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Feature (org.apache.uima.cas.Feature)2 TsvChain (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain)1 TsvSentence (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence)1