Search in sources :

Example 6 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XDeserializer method readSchema.

private TsvSchema readSchema(LineNumberReader aIn, JCas aJCas) throws IOException {
    TsvSchema schema = new TsvSchema();
    int columnIndex = 0;
    // Read first line
    for (String line = aIn.readLine(); !isBlank(line); line = aIn.readLine()) {
        LayerType layerType;
        // Determine layer type
        if (startsWith(line, HEADER_PREFIX_SPAN_LAYER)) {
            layerType = SPAN;
        } else if (startsWith(line, HEADER_PREFIX_RELATION_LAYER)) {
            layerType = RELATION;
        } else if (startsWith(line, HEADER_PREFIX_CHAIN_LAYER)) {
            layerType = CHAIN;
        } else {
            // End of header
            break;
        }
        // Split up layer declaration
        String rest = substringAfter(line, HEADER_LAYER_PREFIX_SEPARATOR);
        String[] fields = split(rest, HEADER_FIELD_SEPARATOR);
        // Get the type name and the corresponding UIMA type from the type system of the
        // target CAS
        String typeName = fields[0];
        Type uimaType = aJCas.getTypeSystem().getType(typeName);
        if (uimaType == null) {
            throw new IOException("CAS type system does not contain a type named [" + typeName + "]");
        }
        // Parse the column declarations starting at the second field (the first is the
        // type name)
        TsvColumn prevColumn = null;
        for (int i = 1; i < fields.length; i++) {
            String colDecl = fields[i];
            TsvColumn col = parseColumnDeclaration(aJCas, layerType, uimaType, columnIndex, colDecl, prevColumn);
            schema.addColumn(col);
            columnIndex++;
            prevColumn = col;
        }
        // If there is no second field, then add a placeholder column
        if (fields.length == 1) {
            schema.addColumn(new TsvColumn(columnIndex, uimaType, layerType));
            columnIndex++;
        }
    }
    return schema;
}
Also used : Type(org.apache.uima.cas.Type) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) IOException(java.io.IOException)

Example 7 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class WebannoTsv3XWriter method process.

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    TsvSchema schema = Tsv3XCasSchemaAnalyzer.analyze(aJCas.getTypeSystem());
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, aJCas);
    try (PrintWriter docOS = new PrintWriter(new OutputStreamWriter(getOutputStream(aJCas, filenameSuffix), encoding))) {
        new Tsv3XSerializer().write(docOS, doc);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) OutputStreamWriter(java.io.OutputStreamWriter) Tsv3XSerializer(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.Tsv3XSerializer) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) PrintWriter(java.io.PrintWriter)

Example 8 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XSchemaAnalyzerTest method testAnalyze.

@Test
public void testAnalyze() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    TsvSchema schema = Tsv3XCasSchemaAnalyzer.analyze(jcas.getTypeSystem());
    for (TsvColumn col : schema.getColumns()) {
        System.out.println(col);
    }
}
Also used : TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Test(org.junit.Test)

Example 9 with TsvSchema

use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.

the class Tsv3XSerializerTest method testRelation.

@Test
public void testRelation() throws Exception {
    // Create test document
    JCas cas = makeJCasOneSentence("This is a test .");
    List<Token> tokens = new ArrayList<>(select(cas, Token.class));
    Dependency dep = new Dependency(cas);
    dep.setGovernor(tokens.get(0));
    dep.setDependent(tokens.get(1));
    dep.setDependencyType("dep");
    dep.setBegin(dep.getDependent().getBegin());
    dep.setEnd(dep.getDependent().getEnd());
    dep.addToIndexes();
    // Set up TSV schema
    TsvSchema schema = new TsvSchema();
    Type dependencyType = cas.getCasType(Dependency.type);
    schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "DependencyType", FeatureType.PRIMITIVE));
    schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "Governor", FeatureType.RELATION_REF));
    // Convert test document content to TSV model
    TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
    doc.getSentences().get(0).getTokens().get(1).addUimaAnnotation(dep, false);
    assertEquals(join(asList("1-1\t0-4\tThis\t_\t_\t", "1-2\t5-7\tis\tdep\t1-1\t"), "\n"), join(asList(doc.getToken(0, 0), doc.getToken(0, 1)), "\n"));
    String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t_\t\n" + "1-2\t5-7\tis\tdep\t1-1\t\n" + "1-3\t8-9\ta\t_\t_\t\n" + "1-4\t10-14\ttest\t_\t_\t\n" + "1-5\t15-16\t.\t_\t_\t\n";
    assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Also used : LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) FeatureType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) ArrayList(java.util.ArrayList) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) JCas(org.apache.uima.jcas.JCas) TsvSchema(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Aggregations

TsvSchema (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema)9 TsvColumn (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn)8 LayerType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType)6 TsvDocument (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument)6 Type (org.apache.uima.cas.Type)6 FeatureType (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.FeatureType)5 JCas (org.apache.uima.jcas.JCas)5 Test (org.junit.Test)5 IOException (java.io.IOException)2 Tsv3XSerializer (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.Tsv3XSerializer)1 TsvChain (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvChain)1 TsvFormatHeader (de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader)1 NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)1 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)1 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)1