use of org.apache.uima.cas.Type in project webanno by webanno.
the class WebannoTsv3Writer method setSpanAnnoPerFeature.
private void setSpanAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, boolean aIsMultiToken, boolean aIsFirst) {
List<String> annoPerFeatures = new ArrayList<>();
featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
int ref = getRefId(aType, aFs, aUnit);
if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) != null && ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)).equals(false)) {
ref = 0;
}
if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) == null && ambigUnits.get(aType.getName()).get(aUnit).equals(false)) {
ref = 0;
}
for (Feature feature : aType.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue;
}
// if slot feature
if (slotFeatures != null && slotFeatures.contains(feature.getName())) {
if (aFs.getFeatureValue(feature) != null) {
ArrayFS array = (ArrayFS) aFs.getFeatureValue(feature);
StringBuilder sbRole = new StringBuilder();
StringBuilder sbTarget = new StringBuilder();
for (FeatureStructure linkFS : array.toArray()) {
String role = linkFS.getStringValue(linkFS.getType().getFeatureByBaseName("role"));
AnnotationFS targetFs = (AnnotationFS) linkFS.getFeatureValue(linkFS.getType().getFeatureByBaseName("target"));
Type tType = targetFs.getType();
AnnotationUnit firstUnit = getFirstUnit(targetFs);
ref = getRefId(tType, targetFs, firstUnit);
// Check if the target is ambiguous or not
if (ambigUnits.get(tType.getName()).get(firstUnit).equals(false)) {
ref = 0;
}
if (role == null) {
role = "*";
} else {
// Escape special character
role = replaceEscapeChars(role);
}
if (sbRole.length() < 1) {
sbRole.append(role);
// record the actual target type column number if slot target is
// uima.tcas.Annotation
int targetTypeNumber = 0;
if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
targetTypeNumber = layerMaps.get(tType);
}
sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
} else {
sbRole.append(";");
sbTarget.append(";");
sbRole.append(role);
int targetTypeNumber = 0;
if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
targetTypeNumber = layerMaps.get(tType);
}
sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
}
}
annoPerFeatures.add(sbRole.toString().isEmpty() ? "_" : sbRole.toString());
annoPerFeatures.add(sbTarget.toString().isEmpty() ? "_" : sbTarget.toString());
} else {
// setting it to null
annoPerFeatures.add("_");
annoPerFeatures.add("_");
}
featurePerLayer.get(aType.getName()).add(ROLE + feature.getName() + "_" + slotLinkTypes.get(feature.getName()));
featurePerLayer.get(aType.getName()).add(slotFeatureTypes.get(feature).getName());
} else {
String annotation = aFs.getFeatureValueAsString(feature);
if (annotation == null) {
annotation = "*";
} else {
// Escape special character
annotation = replaceEscapeChars(annotation);
}
annotation = annotation + (ref > 0 ? "[" + ref + "]" : "");
// only add BIO markers to multiple annotations
setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, annotation);
featurePerLayer.get(aType.getName()).add(feature.getShortName());
}
}
aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
// If the layer do not have a feature at all, add dummy * as a place holder
if (annoPerFeatures.size() == 0) {
setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, "*" + (ref > 0 ? "[" + ref + "]" : ""));
}
aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
use of org.apache.uima.cas.Type in project webanno by webanno.
the class Tsv3XCasDocumentBuilder method of.
public static TsvDocument of(TsvSchema aSchema, JCas aJCas) {
TsvFormatHeader format = new TsvFormatHeader("WebAnno TSV", "3.2");
TsvDocument doc = new TsvDocument(format, aSchema, aJCas);
// Fill document with all the sentences and tokens
for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
TsvSentence sentence = doc.createSentence(uimaSentence);
for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
sentence.createToken(uimaToken);
}
}
// Scan for chains
for (Type headType : aSchema.getChainHeadTypes()) {
for (FeatureStructure chainHead : CasUtil.selectFS(aJCas.getCas(), headType)) {
List<AnnotationFS> elements = new ArrayList<>();
AnnotationFS link = getFeature(chainHead, CHAIN_FIRST_FEAT, AnnotationFS.class);
while (link != null) {
elements.add(link);
link = getFeature(link, CHAIN_NEXT_FEAT, AnnotationFS.class);
}
if (!elements.isEmpty()) {
Type elementType = headType.getFeatureByBaseName(CHAIN_FIRST_FEAT).getRange();
doc.createChain(headType, elementType, elements);
}
}
}
// Build indexes over the token start and end positions such that we can quickly locate
// tokens based on their offsets.
NavigableMap<Integer, TsvToken> tokenBeginIndex = new TreeMap<>();
NavigableMap<Integer, TsvToken> tokenEndIndex = new TreeMap<>();
List<TsvToken> tokens = new ArrayList<>();
for (TsvSentence sentence : doc.getSentences()) {
for (TsvToken token : sentence.getTokens()) {
tokenBeginIndex.put(token.getBegin(), token);
tokenEndIndex.put(token.getEnd(), token);
tokens.add(token);
}
}
// units.
for (Type type : aSchema.getUimaTypes()) {
LayerType layerType = aSchema.getLayerType(type);
boolean addDisambiguationIdIfStacked = SPAN.equals(layerType);
for (AnnotationFS annotation : CasUtil.select(aJCas.getCas(), type)) {
doc.activateType(annotation.getType());
// Get the relevant begin and end offsets for the current annotation
int begin = annotation.getBegin();
int end = annotation.getEnd();
// to be sure.
if (RELATION.equals(layerType)) {
AnnotationFS targetFS = getFeature(annotation, FEAT_REL_TARGET, AnnotationFS.class);
begin = targetFS.getBegin();
end = targetFS.getEnd();
}
TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue();
TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue();
// value obtained from the tokenBeginIndex.
if (begin == end) {
beginToken = endToken;
}
boolean singleToken = beginToken == endToken;
boolean zeroWitdh = begin == end;
boolean multiTokenCapable = SPAN.equals(layerType) || CHAIN.equals(layerType);
// in either case.
if (beginToken.getBegin() == begin && endToken.getEnd() == end) {
doc.mapFS2Unit(annotation, beginToken);
beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
if (multiTokenCapable) {
endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
} else if (zeroWitdh) {
TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
doc.mapFS2Unit(annotation, t);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
} else {
// the annotation.
if (beginToken.getBegin() < begin) {
TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
doc.mapFS2Unit(annotation, t);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
} else // If not the sub-token is ID-defining, then the begin token is ID-defining
{
beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
doc.mapFS2Unit(annotation, beginToken);
}
// checking if if singleToke is true.
if (endToken.getEnd() > end) {
TsvSubToken t = endToken.createSubToken(max(endToken.getBegin(), begin), end);
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
if (!singleToken) {
doc.mapFS2Unit(annotation, t);
}
} else if (!singleToken && multiTokenCapable) {
endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
}
// the end token
if (multiTokenCapable && !singleToken) {
ListIterator<TsvToken> i = tokens.listIterator(tokens.indexOf(beginToken));
TsvToken t;
while ((t = i.next()) != endToken) {
if (t != beginToken) {
t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
}
}
}
// Multi-token span annotations must get a disambiguation ID
if (SPAN.equals(layerType) && !singleToken) {
doc.addDisambiguationId(annotation);
}
}
}
// Scan all created units to see which columns actually contains values
for (TsvSentence sentence : doc.getSentences()) {
for (TsvToken token : sentence.getTokens()) {
scanUnitForActiveColumns(token);
scanUnitForAmbiguousSlotReferences(token);
for (TsvSubToken subToken : token.getSubTokens()) {
scanUnitForActiveColumns(subToken);
scanUnitForAmbiguousSlotReferences(subToken);
}
}
}
// Activate the placeholder columns for any active types for which no other columns are
// active.
Set<Type> activeTypesNeedingPlaceholders = new HashSet<>(doc.getActiveTypes());
for (TsvColumn col : doc.getActiveColumns()) {
activeTypesNeedingPlaceholders.remove(col.uimaType);
}
for (TsvColumn col : doc.getSchema().getColumns()) {
if (PLACEHOLDER.equals(col.featureType) && activeTypesNeedingPlaceholders.contains(col.uimaType)) {
doc.activateColumn(col);
}
}
return doc;
}
use of org.apache.uima.cas.Type in project webanno by webanno.
the class Tsv3XCasSchemaAnalyzer method analyze.
public static TsvSchema analyze(TypeSystem aTypeSystem) {
TsvSchema schema = new TsvSchema();
Set<Type> chainLinkTypes = new HashSet<>();
// Consider only direct subtypes of the UIMA Annotation type. Currently, WebAnno only
// supports such layers.
Type annotationType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
Type documentAnnotationType = aTypeSystem.getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
for (Type type : aTypeSystem.getDirectSubtypes(annotationType)) {
if (aTypeSystem.subsumes(documentAnnotationType, type)) {
continue;
}
if (type.getName().equals(Token.class.getName()) || type.getName().equals(Sentence.class.getName())) {
continue;
}
switch(schema.getLayerType(type)) {
case RELATION:
schema.addColumn(new TsvColumn(type, RELATION, type.getFeatureByBaseName(FEAT_REL_SOURCE), RELATION_REF));
generateColumns(aTypeSystem, schema, RELATION, type);
break;
case CHAIN:
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_TYPE_FEATURE), CHAIN_ELEMENT_TYPE));
schema.addColumn(new TsvColumn(type, CHAIN, type.getFeatureByBaseName(COREFERENCE_RELATION_FEATURE), CHAIN_LINK_TYPE));
chainLinkTypes.add(type);
break;
case SPAN:
schema.addColumn(new TsvColumn(type, SPAN));
generateColumns(aTypeSystem, schema, SPAN, type);
break;
case INCOMPATIBLE:
// Do not generate a column definition for incompatible types.
break;
}
}
// Scan again for the chain head types
Type topType = aTypeSystem.getType(CAS.TYPE_NAME_ANNOTATION_BASE);
for (Type type : aTypeSystem.getDirectSubtypes(topType)) {
Feature firstFeat = type.getFeatureByBaseName(CHAIN_FIRST_FEAT);
if (firstFeat != null && chainLinkTypes.contains(firstFeat.getRange())) {
schema.addChainHeadType(type);
}
}
return schema;
}
use of org.apache.uima.cas.Type in project webanno by webanno.
the class Tsv3XDeserializer method parseColumnDeclaration.
private TsvColumn parseColumnDeclaration(JCas aJCas, LayerType aLayerType, Type aUimaType, int aIndex, String aColDecl, TsvColumn aPrevCol) throws IOException {
TypeSystem ts = aJCas.getTypeSystem();
TsvColumn column;
// SLOT_ROLE - starts with "ROLE_"
if (SPAN.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_ROLE)) {
String[] subFields = splitPreserveAllTokens(aColDecl, '_');
String featureName = substringAfter(subFields[1], ":");
Feature feat = aUimaType.getFeatureByBaseName(featureName);
if (feat == null) {
throw new IOException("CAS type [" + aUimaType.getName() + "] does not have a feature called [" + featureName + "]");
}
column = new TsvColumn(aIndex, aUimaType, aLayerType, featureName, SLOT_ROLE);
String typeName = subFields[2];
Type type = ts.getType(typeName);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + typeName + "]");
}
column.setTargetTypeHint(type);
} else // RELATION_REF - starts with "BT_
if (RELATION.equals(aLayerType) && startsWith(aColDecl, HEADER_PREFIX_BASE_TYPE)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, FEAT_REL_SOURCE, RELATION_REF);
String typeName = substringAfter(aColDecl, HEADER_PREFIX_BASE_TYPE);
Type type = ts.getType(typeName);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + typeName + "]");
}
column.setTargetTypeHint(type);
} else // CHAIN_ELEMENT_TYPE - "referenceType"
if (CHAIN.equals(aLayerType) && COREFERENCE_TYPE_FEATURE.equals(aColDecl)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_TYPE_FEATURE, CHAIN_ELEMENT_TYPE);
} else // CHAIN_LINK_TYPE - "referenceRelation"
if (CHAIN.equals(aLayerType) && COREFERENCE_RELATION_FEATURE.equals(aColDecl)) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, COREFERENCE_RELATION_FEATURE, CHAIN_LINK_TYPE);
} else // SLOT_TARGET - name of the link target type
if (SPAN.equals(aLayerType) && aColDecl.contains(".") || ts.getType(aColDecl) != null) {
// the type name really exists in the target CAS.
if (ts.getType(aColDecl) == null) {
throw new IOException("CAS type system does not contain a type named [" + aColDecl + "]");
}
// name from it.
if (aPrevCol == null || !SLOT_ROLE.equals(aPrevCol.featureType)) {
throw new IOException("Slot target column declaration must follow slot role column declaration");
}
column = new TsvColumn(aIndex, aUimaType, aLayerType, aPrevCol.uimaFeature.getShortName(), SLOT_TARGET);
Type type = ts.getType(aColDecl);
if (type == null) {
throw new IOException("CAS does not contain a type called [" + aColDecl + "]");
}
column.setTargetTypeHint(type);
} else // PRIMITIVE - feature name
if (aUimaType.getFeatureByBaseName(aColDecl) != null) {
column = new TsvColumn(aIndex, aUimaType, aLayerType, aColDecl, PRIMITIVE);
} else {
throw new IOException("Type [" + aUimaType.getName() + "] does not contain a feature called [" + aColDecl + "]");
}
return column;
}
use of org.apache.uima.cas.Type in project webanno by webanno.
the class Conll2009Reader method convert.
public void convert(JCas aJCas, BufferedReader aReader) throws IOException {
if (readPos) {
try {
posMappingProvider.configure(aJCas.getCas());
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
}
JCasBuilder doc = new JCasBuilder(aJCas);
List<String[]> words;
while ((words = readSentence(aReader)) != null) {
if (words.isEmpty()) {
// markers following each other.
continue;
}
int sentenceBegin = doc.getPosition();
int sentenceEnd = sentenceBegin;
// Tokens, Lemma, POS
Map<Integer, Token> tokens = new HashMap<Integer, Token>();
List<SemPred> preds = new ArrayList<>();
Iterator<String[]> wordIterator = words.iterator();
while (wordIterator.hasNext()) {
String[] word = wordIterator.next();
// Read token
Token token = doc.add(word[FORM], Token.class);
tokens.put(Integer.valueOf(word[ID]), token);
if (wordIterator.hasNext()) {
doc.add(" ");
}
// Read lemma
if (!UNUSED.equals(word[LEMMA]) && readLemma) {
Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd());
lemma.setValue(word[LEMMA]);
lemma.addToIndexes();
token.setLemma(lemma);
}
// Read part-of-speech tag
if (!UNUSED.equals(word[POS]) && readPos) {
Type posTag = posMappingProvider.getTagType(word[POS]);
POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
pos.setPosValue(word[POS].intern());
// WebAnno did not yet backport the coarse grained POS feature from
// DKPro Core 1.9.0
// POSUtils.assignCoarseValue(pos);
pos.addToIndexes();
token.setPos(pos);
}
// Read morphological features
if (!UNUSED.equals(word[FEAT]) && readMorph) {
MorphologicalFeatures morphtag = new MorphologicalFeatures(aJCas, token.getBegin(), token.getEnd());
morphtag.setValue(word[FEAT]);
morphtag.addToIndexes();
}
if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) {
SemPred pred = new SemPred(aJCas, token.getBegin(), token.getEnd());
pred.setCategory(word[PRED]);
pred.addToIndexes();
preds.add(pred);
}
sentenceEnd = token.getEnd();
}
// Dependencies
if (readDependency) {
for (String[] word : words) {
if (!UNUSED.equals(word[DEPREL])) {
int depId = Integer.valueOf(word[ID]);
int govId = Integer.valueOf(word[HEAD]);
// Model the root as a loop onto itself
if (govId == 0) {
// Not using ROOT here because WebAnno cannot deal with elevated
// types
Dependency rel = new Dependency(aJCas);
rel.setGovernor(tokens.get(depId));
rel.setDependent(tokens.get(depId));
rel.setDependencyType(word[DEPREL]);
rel.setBegin(rel.getDependent().getBegin());
rel.setEnd(rel.getDependent().getEnd());
// This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
// classes
FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
rel.addToIndexes();
} else {
Dependency rel = new Dependency(aJCas);
rel.setGovernor(tokens.get(govId));
rel.setDependent(tokens.get(depId));
rel.setDependencyType(word[DEPREL]);
rel.setBegin(rel.getDependent().getBegin());
rel.setEnd(rel.getDependent().getEnd());
// This is set via FSUtil because we still use the DKPro Core 1.7.0 JCas
// classes
FSUtil.setFeature(rel, "flavor", DependencyFlavor.BASIC);
rel.addToIndexes();
}
}
}
}
// Semantic arguments
if (readSemanticPredicate) {
// Get arguments for one predicate at a time
for (int p = 0; p < preds.size(); p++) {
List<SemArgLink> args = new ArrayList<>();
for (String[] word : words) {
if (!UNUSED.equals(word[APRED + p])) {
Token token = tokens.get(Integer.valueOf(word[ID]));
SemArg arg = new SemArg(aJCas, token.getBegin(), token.getEnd());
arg.addToIndexes();
SemArgLink link = new SemArgLink(aJCas);
link.setRole(word[APRED + p]);
link.setTarget(arg);
args.add(link);
}
}
SemPred pred = preds.get(p);
pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args));
}
}
// Sentence
Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
sentence.addToIndexes();
// Once sentence per line.
doc.add("\n");
}
doc.close();
}
Aggregations