use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XDeserializer method parseColumnDeclaration.
private TsvColumn parseColumnDeclaration(JCas aJCas, LayerType aLayerType, Type aUimaType, int aIndex, String aColDecl, TsvColumn aPrevCol) throws IOException {
TypeSystem ts = aJCas.getTypeSystem();
TsvColumn column;
// SLOT_ROLE - starts with "ROLE_"
if (SPAN.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_ROLE)) {
String[] subFields = splitPreserveAllTokens(aColDecl, '_');
String featureName = substringAfter(subFields[1], ":");
Feature feat = aUimaType.getFeatureByBaseName(featureName);
if (feat == null) {
throw new IOException("CAS type [" + aUimaType.getName() + "] does not have a feature called [" + featureName + "]");
}
column = new TsvColumn(aIndex, aUimaType, aLayerType, featureName, SLOT_ROLE);
String typeName = subFields[2];
Type type = ts.getType(typeName);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + typeName + "]");
}
column.setTargetTypeHint(type);
} else // RELATION_REF - starts with "BT_
if (RELATION.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_BASE_TYPE)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, FEAT_REL_SOURCE, RELATION_REF);
String typeName = substringAfter(aColDecl, HEADER_PREFIX_BASE_TYPE);
Type type = ts.getType(typeName);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + typeName + "]");
}
column.setTargetTypeHint(type);
} else // CHAIN_ELEMENT_TYPE - "referenceType"
if (CHAIN.equals(aLayerType) && COREFERENCE_TYPE_FEATURE.equals(aColDecl)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_TYPE_FEATURE, CHAIN_ELEMENT_TYPE);
} else // CHAIN_LINK_TYPE - "referenceRelation"
if (CHAIN.equals(aLayerType) && COREFERENCE_RELATION_FEATURE.equals(aColDecl)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_RELATION_FEATURE, CHAIN_LINK_TYPE);
} else // SLOT_TARGET - name of the link target type
if (SPAN.equals(aLayerType) && aColDecl.contains(".") || ts.getType(aColDecl) != null) {
// the type name really exists in the target CAS.
if (ts.getType(aColDecl) == null) {
throw new IOException("CAS type system does not contain a type named [" + aColDecl + "]");
}
// name from it.
if (aPrevCol == null || !SLOT_ROLE.equals(aPrevCol.featureType)) {
throw new IOException("Slot target column declaration must follow slot role column declaration");
}
column = new TsvColumn(aIndex, aUimaType, aLayerType, aPrevCol.uimaFeature.getShortName(), SLOT_TARGET);
Type type = ts.getType(aColDecl);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + aColDecl + "]");
}
column.setTargetTypeHint(type);
} else // PRIMITIVE - feature name
if (aUimaType.getFeatureByBaseName(aColDecl) != null) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, aColDecl, PRIMITIVE);
} else {
throw new IOException("Type [" + aUimaType.getName() + "] does not contain a feature called [" + aColDecl + "]");
}
return column;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XDeserializer method getOrCreateSpanAnnotation.
private AnnotationFS getOrCreateSpanAnnotation(TsvColumn aCol, TsvUnit aUnit, int aStackingIndex, String aDisambiguationInfo) {
int disambiguationId = aDisambiguationInfo != null ? Integer.valueOf(aDisambiguationInfo) : -1;
// Check if we have seen the same annotation already in the current unit but in another
// column.
AnnotationFS annotation = aUnit.getUimaAnnotation(aCol.uimaType, aStackingIndex);
// If not, check if we have seen the same annotation already in a previous unit
if (annotation == null && disambiguationId != -1) {
annotation = aUnit.getDocument().getDisambiguatedAnnotation(disambiguationId);
if (annotation != null) {
aUnit.addUimaAnnotation(annotation);
// Extend the span of the existing annotation
// Unfortunately, the AnnotationFS interface does not define a setEnd() method.
setFeature(annotation, CAS.FEATURE_BASE_NAME_END, aUnit.getEnd());
}
}
// Still no annotation? Then we have to create one
if (annotation == null) {
annotation = aUnit.getDocument().getJCas().getCas().createAnnotation(aCol.uimaType, aUnit.getBegin(), aUnit.getEnd());
aUnit.addUimaAnnotation(annotation);
// Check if there are slot features that need to be initialized
List<TsvColumn> otherColumnsForType = aUnit.getDocument().getSchema().getColumns(aCol.uimaType);
for (TsvColumn col : otherColumnsForType) {
if (SLOT_TARGET.equals(col.featureType)) {
setFeature(annotation, col.uimaFeature.getShortName(), emptyList());
}
}
// Special handling of DKPro Core Token-attached annotations
if (Lemma.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setLemma((Lemma) annotation);
}
if (Stem.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setStem((Stem) annotation);
}
if (MorphologicalFeatures.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setMorph((MorphologicalFeatures) annotation);
}
if (POS.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setPos((POS) annotation);
}
}
// to extend the range of multi-token IDs.
if (disambiguationId != -1) {
aUnit.getDocument().addDisambiguationId(annotation, disambiguationId);
}
return annotation;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSerializerTest method testSingleSubTokenWithValue.
@Test
public void testSingleSubTokenWithValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
addNamedEntity(cas, 1, 3, "PER");
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t1-3\thi\tPER\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSerializerTest method testSingleZeroWidthTokenWithoutValue.
@Test
public void testSingleZeroWidthTokenWithoutValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
addNamedEntity(cas, 0, 0, null);
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\t_\t\n" + "1-1.1\t0-0\t\t*\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XSerializerTest method testStackedSingleTokenWithValue.
@Test
public void testStackedSingleTokenWithValue() throws Exception {
// Create test document
JCas cas = makeJCasOneSentence("This is a test .");
NamedEntity ne1 = addNamedEntity(cas, 0, 4, "PER");
NamedEntity ne2 = addNamedEntity(cas, 0, 4, "ORG");
// Set up TSV schema
TsvSchema schema = new TsvSchema();
Type namedEntityType = cas.getCasType(NamedEntity.type);
schema.addColumn(new TsvColumn(namedEntityType, LayerType.SPAN, "value", FeatureType.PRIMITIVE));
// Convert test document content to TSV model
TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, cas);
doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne1, true);
doc.getSentences().get(0).getTokens().get(0).addUimaAnnotation(ne2, true);
assertEquals("1-1\t0-4\tThis\tPER[1]|ORG[2]\t", doc.getSentences().get(0).getTokens().get(0).toString());
String expectedSentence = "#Text=This is a test .\n" + "1-1\t0-4\tThis\tPER[1]|ORG[2]\t\n" + "1-2\t5-7\tis\t_\t\n" + "1-3\t8-9\ta\t_\t\n" + "1-4\t10-14\ttest\t_\t\n" + "1-5\t15-16\t.\t_\t\n";
assertEquals(expectedSentence, doc.getSentences().get(0).toString());
}
Aggregations