use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.
the class Tsv3XCasSchemaAnalyzer method analyze.
public static TsvSchema analyze(TypeSystem aTypeSystem) {
TsvSchema schema = new TsvSchema();
Set<Type> chainLinkTypes = new HashSet<>();
// Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
// supports such layers.
Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
if (aTypeSystem.subsumes(documentAnnotationType, type)) {
continue;
}
if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
continue;
}
switch(schema.getLayerType(type)) {
case RELATION:
schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
generateColumns(aTypeSystem, schema, RELATION, type);
break;
case CHAIN:
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
chainLinkTypes.add(type);
break;
case SPAN:
schema.addColumn(new TsvColumn(type, SPAN));
generateColumns(aTypeSystem, schema, SPAN, type);
break;
case INCOMPATIBLE:
// Do not generate a column definition for incompatible types.
break;
}
}
// Scan again for the chain head types
Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
schema.addChainHeadType(type);
}
}
return schema;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.
the class Tsv3XDeserializer method read.
public void read(LineNumberReader aIn, JCas aJCas) throws IOException {
deferredActions.set(new ArrayList<>());
TsvFormatHeader format = readFormat(aIn);
TsvSchema schema = readSchema(aIn, aJCas);
// Read the extra blank line after the schema declaration
String emptyLine = aIn.readLine();
assert isEmpty(emptyLine);
TsvDocument doc = new TsvDocument(format, schema, aJCas);
for (TsvColumn column : schema.getColumns()) {
doc.activateColumn(column);
doc.activateType(column.uimaType);
}
readContent(aIn, doc);
// Complete the addition of the chains
CAS cas = aJCas.getCas();
for (TsvChain chain : doc.getChains()) {
if (chain.getElements().isEmpty()) {
continue;
}
Iterator<AnnotationFS> linkIterator = chain.getElements().iterator();
AnnotationFS link = linkIterator.next();
// Create the chain head
FeatureStructure head = cas.createFS(chain.getHeadType());
setFeature(head, CHAIN_FIRST_FEAT, link);
cas.addFsToIndexes(head);
// Connect the links to each other
AnnotationFS prevLink = link;
while (linkIterator.hasNext()) {
link = linkIterator.next();
setFeature(prevLink, CHAIN_NEXT_FEAT, link);
prevLink = link;
}
}
// Run deferred actions
for (Runnable action : deferredActions.get()) {
action.run();
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.
the class Tsv3XSerializerTest method testSingleSubTokenWithValue.
@Test
public void testSingleSubTokenWithValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
addNamedEntity(cas, 1, 3, "PER");
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t1-3\thi\tPER\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.
the class Tsv3XSerializerTest method testSingleZeroWidthTokenWithoutValue.
@Test
public void testSingleZeroWidthTokenWithoutValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
addNamedEntity(cas, 0, 0, null);
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t0-0\t\t*\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSchema in project webanno by webanno.
the class Tsv3XSerializerTest method testStackedSingleTokenWithValue.
@Test
public void testStackedSingleTokenWithValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
NamedEntity ne1 = addNamedEntity(cas, 0, 4, "PER");
NamedEntity ne2 = addNamedEntity(cas, 0, 4, "ORG");
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne1, true);
doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne2, true);
assertEquals("1-1\t0-4\tThis\tPER[1]|ORG[2]\t", doc.getSentences().get(0).getTokens().get(0).toString());
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\tPER[1]|ORG[2]\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Aggregations