use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XDeserializer method readSchema.
private TsvSchema readSchema(LineNumberReader aIn, JCas aJCas) throws IOException {
TsvSchema schema = new TsvSchema();
int columnIndex = 0;
// Read first line
for (String line = aIn.readLine(); !isBlank(line); line = aIn.readLine()) {
LayerType layerType;
// Determine layer type
if (startsWith(line, HEADER_PREFIX_SPAN_LAYER)) {
layerType = SPAN;
} else if (startsWith(line, HEADER_PREFIX_RELATION_LAYER)) {
layerType = RELATION;
} else if (startsWith(line, HEADER_PREFIX_CHAIN_LAYER)) {
layerType = CHAIN;
} else {
// End of header
break;
}
// Split up layer declaration
String rest = substringAfter(line, HEADER_LAYER_PREFIX_SEPARATOR);
String[] fields = split(rest, HEADER_FIELD_SEPARATOR);
// Get the type name and the corresponding UIMA type from the type system of the
// target CAS
String typeName = fields[0];
Type uimaType = aJCas.getTypeSystem().getType(typeName);
if (uimaType == null) {
throw new IOException("CAS type system does not contain a type named [" + typeName + "]");
}
// Parse the column declarations starting at the second field (the first is the
// type name)
TsvColumn prevColumn = null;
for (int i = 1; i < fields.length; i++) {
String colDecl = fields[i];
TsvColumn col = parseColumnDeclaration(aJCas, layerType, uimaType, columnIndex, colDecl, prevColumn);
schema.addColumn(col);
columnIndex++;
prevColumn = col;
}
// If there is no second field, then add a placeholder column
if (fields.length == 1) {
schema.addColumn(new TsvColumn(columnIndex, uimaType, layerType));
columnIndex++;
}
}
return schema;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XDeserializer method parseAnnotations.
private void parseAnnotations(TsvDocument aDoc, TsvSentence aSentence, TsvUnit aUnit, String[] aFields) {
List<TsvColumn> headerColumns = aDoc.getSchema().getHeaderColumns(aDoc.getSchema().getColumns());
for (TsvColumn col : headerColumns) {
String rawValue = aFields[col.index + 3];
if (NULL_COLUMN.equals(rawValue)) {
continue;
}
String[] stackedValues = STACK_SEP_PATTERN.split(rawValue);
int index = 0;
for (String val : stackedValues) {
parseAnnotation(aDoc, aSentence, aUnit, col, index, val);
index++;
}
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSchemaAnalyzerTest method testAnalyze.
@Test
public void testAnalyze() throws Exception {
JCas jcas = JCasFactory.createJCas();
TsvSchema schema = Tsv3XCasSchemaAnalyzer.analyze(jcas.getTypeSystem());
for (TsvColumn col : schema.getColumns()) {
System.out.println(col);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSerializerTest method testRelation.
@Test
public void testRelation() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
List<Token> tokens = new ArrayList<>(select(cas, Token.class));
Dependency dep = new Dependency(cas);
dep.setGovernor(tokens.get(0));
dep.setDependent(tokens.get(1));
dep.setDependencyType("dep");
dep.setBegin(dep.getDependent().getBegin());
dep.setEnd(dep.getDependent().getEnd());
dep.addToIndexes();
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type dependencyType = cas.getCasType(Dependency.type);
schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "DependencyType", FeatureType.PRIMITIVE));
schema.addColumn(new TsvColumn(dependencyType, LayerType.RELATION, "Governor", FeatureType.RELATION_REF));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
doc.getSentences().get(0).getTokens().get(1).addUimaAnnotation(dep, false);
assertEquals(join(asList("1-1\t0-4\tThis\t_\t_\t", "1-2\t5-7\tis\tdep\t1-1\t"), "\n"), join(asList(doc.getToken(0, 0), doc.getToken(0, 1)), "\n"));
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t_\t\n" + "1-2\t5-7\tis\tdep\t1-1\t\n" + "1-3\t8-9\ta\t_\t_\t\n" + "1-4\t10-14\ttest\t_\t_\t\n" + "1-5\t15-16\t.\t_\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSerializer method write.
public void write(PrintWriter aOut, TsvSchema aSchema, Set<TsvColumn> aActiveColumns) {
Type currentType = null;
List<TsvColumn> headerColumns = aSchema.getHeaderColumns(aActiveColumns);
for (TsvColumn col : headerColumns) {
if (currentType == null || !currentType.equals(col.uimaType)) {
if (currentType != null) {
aOut.print(LINE_BREAK);
}
currentType = col.uimaType;
switch(col.layerType) {
case SPAN:
aOut.print(HEADER_PREFIX_SPAN_LAYER);
break;
case RELATION:
aOut.print(HEADER_PREFIX_RELATION_LAYER);
break;
case CHAIN:
aOut.print(HEADER_PREFIX_CHAIN_LAYER);
break;
}
aOut.print(col.uimaType.getName());
}
if (RELATION_REF.equals(col.featureType)) {
aOut.print(HEADER_FIELD_SEPARATOR);
aOut.print(HEADER_PREFIX_BASE_TYPE);
if (col.getTargetTypeHint() != null) {
// COMPATIBILITY NOTE:
// WebAnnoTsv3Writer obtains the type of a relation target column not from
// the type system definition but rather by looking at target used by the
// first actual annotation. This assumes that relations are always only on
// a single type.
aOut.printf(col.getTargetTypeHint().getName());
} else {
aOut.printf(col.uimaFeature.getRange().getName());
}
} else if (SLOT_TARGET.equals(col.featureType)) {
if (col.getTargetTypeHint() != null) {
// COMPATIBILITY NOTE:
// WebAnnoTsv3Writer obtains the type of a slot target column not from
// the type system definition but rather by looking at target used by the
// first actual annotation.
aOut.print(HEADER_FIELD_SEPARATOR);
aOut.print(col.getTargetTypeHint());
} else {
aOut.print(HEADER_FIELD_SEPARATOR);
aOut.print(col.uimaFeature.getRange().getName());
}
} else if (SLOT_ROLE.equals(col.featureType)) {
aOut.print(HEADER_FIELD_SEPARATOR);
aOut.print(HEADER_PREFIX_ROLE);
aOut.printf("%s_%s", col.uimaFeature.getName(), col.uimaFeature.getRange().getComponentType().getName());
} else if (SLOT_TARGET.equals(col.featureType)) {
aOut.print(HEADER_FIELD_SEPARATOR);
aOut.print(col.uimaFeature.getRange().getComponentType().getFeatureByBaseName(FEAT_SLOT_TARGET).getRange().getName());
} else {
// COMPATIBILITY NOTE:
// Yes, this pipe symbol needs to be written
aOut.print("|");
if (col.uimaFeature != null) {
aOut.print(col.uimaFeature.getShortName());
}
}
}
// Add line-break to terminate the final column definition
if (!headerColumns.isEmpty()) {
aOut.print(LINE_BREAK);
}
// COMPATIBILITY NOTE:
// This is really just to make the output match exactly TSV3
aOut.print(LINE_BREAK);
}
Aggregations