use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XCasDocumentBuilder method scanUnitForActiveColumns.
private static void scanUnitForActiveColumns(TsvUnit aUnit) {
for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
if (!annotationsForColumn.isEmpty()) {
if (!PLACEHOLDER.equals(col.featureType)) {
aUnit.getDocument().activateColumn(col);
}
// actual annotation.
if (RELATION.equals(col.layerType) && RELATION_REF.equals(col.featureType)) {
AnnotationFS annotation = annotationsForColumn.get(0);
FeatureStructure target = FSUtil.getFeature(annotation, FEAT_REL_SOURCE, FeatureStructure.class);
if (target == null) {
throw new IllegalStateException("Relation does not have its source feature (" + FEAT_REL_SOURCE + ") set: " + annotation);
}
if (col.uimaType.getName().equals(Dependency.class.getName())) {
// COMPATIBILITY NOTE:
// WebAnnoTsv3Writer hard-changes the target type for DKPro Core
// Dependency annotations from Token to POS - the reason is not really
// clear. Probably because the Dependency relations in the WebAnno UI
// attach to POS (Token's are not visible as annotations in the UI).
col.setTargetTypeHint(aUnit.getDocument().getJCas().getTypeSystem().getType(POS.class.getName()));
} else {
col.setTargetTypeHint(target.getType());
}
}
}
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XCasDocumentBuilder method scanUnitForAmbiguousSlotReferences.
/**
* If a slot feature has the target type Annotation, then any kind of annotation can be
* used as slot filler. In this case, the targets are ambiguous and require an disambiguaton
* ID.
*/
private static void scanUnitForAmbiguousSlotReferences(TsvUnit aUnit) {
for (TsvColumn col : aUnit.getDocument().getSchema().getColumns()) {
if (SPAN.equals(col.layerType) && SLOT_TARGET.equals(col.featureType) && CAS.TYPE_NAME_ANNOTATION.equals(col.getTargetTypeHint().getName())) {
List<AnnotationFS> annotationsForColumn = aUnit.getAnnotationsForColumn(col);
for (AnnotationFS aFS : annotationsForColumn) {
FeatureStructure[] links = getFeature(aFS, col.uimaFeature, FeatureStructure[].class);
for (FeatureStructure link : links) {
AnnotationFS targetFS = getFeature(link, TsvSchema.FEAT_SLOT_TARGET, AnnotationFS.class);
if (targetFS == null) {
throw new IllegalStateException("Slot link has no target: " + link);
}
aUnit.getDocument().addDisambiguationId(targetFS);
}
}
}
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XCasDocumentBuilder method of.
public static TsvDocument of(TsvSchema aSchema, JCas aJCas) {
TsvFormatHeader format = new TsvFormatHeader("WebAnno TSV", "3.2");
TsvDocument doc = new TsvDocument(format, aSchema, aJCas);
// Fill document with all the sentences and tokens
for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
TsvSentence sentence = doc.createSentence(uimaSentence);
for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
sentence.createToken(uimaToken);
}
}
// Scan for chains
for (Type headType : aSchema.getChainHeadTypes()) {
for (FeatureStructure chainHead : CasUtil.selectFS(aJCas.getCas(), headType)) {
List<AnnotationFS> elements = new ArrayList<>();
AnnotationFS link = getFeature(chainHead, CHAIN_FIRST_FEAT, AnnotationFS.class);
while (link != null) {
elements.add(link);
link = getFeature(link, CHAIN_NEXT_FEAT, AnnotationFS.class);
}
if (!elements.isEmpty()) {
Type elementType = headType.getFeatureByBaseName(CHAIN_FIRST_FEAT).getRange();
doc.createChain(headType, elementType, elements);
}
}
}
// Build indexes over the token start and end positions such that we can quickly locate
// tokens based on their offsets.
NavigableMap<Integer, TsvToken> tokenBeginIndex = new TreeMap<>();
NavigableMap<Integer, TsvToken> tokenEndIndex = new TreeMap<>();
List<TsvToken> tokens = new ArrayList<>();
for (TsvSentence sentence : doc.getSentences()) {
for (TsvToken token : sentence.getTokens()) {
tokenBeginIndex.put(token.getBegin(), token);
tokenEndIndex.put(token.getEnd(), token);
tokens.add(token);
}
}
// units.
for (Type type : aSchema.getUimaTypes()) {
LayerType layerType = aSchema.getLayerType(type);
boolean addDisambiguationIdIfStacked = SPAN.equals(layerType);
for (AnnotationFS annotation : CasUtil.select(aJCas.getCas(), type)) {
doc.activateType(annotation.getType());
// Get the relevant begin and end offsets for the current annotation
int begin = annotation.getBegin();
int end = annotation.getEnd();
// to be sure.
if (RELATION.equals(layerType)) {
AnnotationFS targetFS = getFeature(annotation, FEAT_REL_TARGET, AnnotationFS.class);
begin = targetFS.getBegin();
end = targetFS.getEnd();
}
TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue();
TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue();
// value obtained from the tokenBeginIndex.
if (begin == end) {
beginToken = endToken;
}
boolean singleToken = beginToken == endToken;
boolean zeroWitdh = begin == end;
boolean multiTokenCapable = SPAN.equals(layerType) || CHAIN.equals(layerType);
// in either case.
if (beginToken.getBegin() == begin && endToken.getEnd() == end) {
doc.mapFS2Unit(annotation, beginToken);
beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
if (multiTokenCapable) {
endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
} else if (zeroWitdh) {
TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
doc.mapFS2Unit(annotation, t);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
} else {
// the annotation.
if (beginToken.getBegin() < begin) {
TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
doc.mapFS2Unit(annotation, t);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
} else // If not the sub-token is ID-defining, then the begin token is ID-defining
{
beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
doc.mapFS2Unit(annotation, beginToken);
}
// checking if if singleToke is true.
if (endToken.getEnd() > end) {
TsvSubToken t = endToken.createSubToken(max(endToken.getBegin(), begin), end);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
if (!singleToken) {
doc.mapFS2Unit(annotation, t);
}
} else if (!singleToken && multiTokenCapable) {
endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
}
// the end token
if (multiTokenCapable && !singleToken) {
ListIterator<TsvToken> i = tokens.listIterator(tokens.indexOf(beginToken));
TsvToken t;
while ((t = i.next()) != endToken) {
if (t != beginToken) {
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
}
}
// Multi-token span annotations must get a disambiguation ID
if (SPAN.equals(layerType) && !singleToken) {
doc.addDisambiguationId(annotation);
}
}
}
// Scan all created units to see which columns actually contains values
for (TsvSentence sentence : doc.getSentences()) {
for (TsvToken token : sentence.getTokens()) {
scanUnitForActiveColumns(token);
scanUnitForAmbiguousSlotReferences(token);
for (TsvSubToken subToken : token.getSubTokens()) {
scanUnitForActiveColumns(subToken);
scanUnitForAmbiguousSlotReferences(subToken);
}
}
}
// Activate the placeholder columns for any active types for which no other columns are
// active.
Set<Type> activeTypesNeedingPlaceholders = new HashSet<>(doc.getActiveTypes());
for (TsvColumn col : doc.getActiveColumns()) {
activeTypesNeedingPlaceholders.remove(col.uimaType);
}
for (TsvColumn col : doc.getSchema().getColumns()) {
if (PLACEHOLDER.equals(col.featureType) && activeTypesNeedingPlaceholders.contains(col.uimaType)) {
doc.activateColumn(col);
}
}
return doc;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XCasSchemaAnalyzer method analyze.
public static TsvSchema analyze(TypeSystem aTypeSystem) {
TsvSchema schema = new TsvSchema();
Set<Type> chainLinkTypes = new HashSet<>();
// Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
// supports such layers.
Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
if (aTypeSystem.subsumes(documentAnnotationType, type)) {
continue;
}
if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
continue;
}
switch(schema.getLayerType(type)) {
case RELATION:
schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
generateColumns(aTypeSystem, schema, RELATION, type);
break;
case CHAIN:
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
chainLinkTypes.add(type);
break;
case SPAN:
schema.addColumn(new TsvColumn(type, SPAN));
generateColumns(aTypeSystem, schema, SPAN, type);
break;
case INCOMPATIBLE:
// Do not generate a column definition for incompatible types.
break;
}
}
// Scan again for the chain head types
Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
schema.addChainHeadType(type);
}
}
return schema;
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn in project webanno by webanno.
the class Tsv3XDeserializer method read.
public void read(LineNumberReader aIn, JCas aJCas) throws IOException {
deferredActions.set(new ArrayList<>());
TsvFormatHeader format = readFormat(aIn);
TsvSchema schema = readSchema(aIn, aJCas);
// Read the extra blank line after the schema declaration
String emptyLine = aIn.readLine();
assert isEmpty(emptyLine);
TsvDocument doc = new TsvDocument(format, schema, aJCas);
for (TsvColumn column : schema.getColumns()) {
doc.activateColumn(column);
doc.activateType(column.uimaType);
}
readContent(aIn, doc);
// Complete the addition of the chains
CAS cas = aJCas.getCas();
for (TsvChain chain : doc.getChains()) {
if (chain.getElements().isEmpty()) {
continue;
}
Iterator<AnnotationFS> linkIterator = chain.getElements().iterator();
AnnotationFS link = linkIterator.next();
// Create the chain head
FeatureStructure head = cas.createFS(chain.getHeadType());
setFeature(head, CHAIN_FIRST_FEAT, link);
cas.addFsToIndexes(head);
// Connect the links to each other
AnnotationFS prevLink = link;
while (linkIterator.hasNext()) {
link = linkIterator.next();
setFeature(prevLink, CHAIN_NEXT_FEAT, link);
prevLink = link;
}
}
// Run deferred actions
for (Runnable action : deferredActions.get()) {
action.run();
}
}
Aggregations