use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv3Reader method addAnnotationWithNoFeature.
private int addAnnotationWithNoFeature(JCas aJCas, Type aType, AnnotationUnit aUnit, List<AnnotationFS> aAnnos, Map<AnnotationUnit, Map<Integer, AnnotationFS>> aMultiTokUnits, int aEnd, int aRef) {
String anno = annotationsPerPostion.get(aType).get(aUnit).get(0);
if (!anno.equals("_")) {
int i = 0;
String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
for (String mAnnos : anno.split(stackedAnnoRegex)) {
String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
for (String mAnno : mAnnos.split(multipleSlotAnno)) {
String depRef = "";
if (mAnno.endsWith("]")) {
depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
aRef = depRef.contains("_") ? 0 : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
mAnno = mAnno.substring(0, mAnno.indexOf("["));
}
boolean isMultitoken = false;
AnnotationFS multiAnnoFs = null;
if (!aMultiTokUnits.isEmpty()) {
for (AnnotationUnit u : aMultiTokUnits.keySet()) {
for (Integer r : aMultiTokUnits.get(u).keySet()) {
if (aRef == r) {
isMultitoken = true;
multiAnnoFs = aMultiTokUnits.get(u).get(r);
break;
}
}
}
}
if (isMultitoken) {
Feature endF = aType.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
multiAnnoFs.setIntValue(endF, aEnd);
setAnnoRefPerUnit(aUnit, aType, aRef, multiAnnoFs);
} else {
aMultiTokUnits.putIfAbsent(aUnit, new HashMap<>());
aMultiTokUnits.get(aUnit).put(aRef, aAnnos.get(i));
aJCas.addFsToIndexes(aAnnos.get(i));
setAnnoRefPerUnit(aUnit, aType, aRef, aAnnos.get(i));
}
aRef++;
}
i++;
}
}
return aRef;
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv3Reader method setAnnosPerTypePerUnit.
/**
* Gets annotations from lines (of {@link AnnotationUnit}s) and save for the later access, while
* reading the document the first time. <br>
*
* @param lines
* TSV lines exported from WebAnno
* @param unit
* the annotation unit (Token or sub-tokens)
* @param ind
* index of the annotation, from the TAB separated annotations in the TSV lines
*/
private void setAnnosPerTypePerUnit(String[] lines, AnnotationUnit unit, int ind) {
for (Type type : allLayers.keySet()) {
annotationsPerPostion.putIfAbsent(type, new LinkedHashMap<>());
if (allLayers.get(type).size() == 0) {
annotationsPerPostion.get(type).put(unit, annotationsPerPostion.get(type).getOrDefault(unit, new ArrayList<>()));
annotationsPerPostion.get(type).get(unit).add(lines[ind]);
ind++;
continue;
}
for (Feature f : allLayers.get(type)) {
annotationsPerPostion.get(type).put(unit, annotationsPerPostion.get(type).getOrDefault(unit, new ArrayList<>()));
annotationsPerPostion.get(type).get(unit).add(lines[ind]);
ind++;
}
// Add at the last position the line number
// It will be used to get Annotation unit
annotationsPerPostion.get(type).get(unit).add(lines[0]);
}
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv3Writer method setRelationAnnotation.
private void setRelationAnnotation(JCas aJCas) {
for (String l : relationLayers) {
if (l.equals(Token.class.getName())) {
continue;
}
Map<AnnotationUnit, List<List<String>>> annotationsPertype;
if (annotationsPerPostion.get(l) == null) {
annotationsPertype = new HashMap<>();
} else {
annotationsPertype = annotationsPerPostion.get(l);
}
Type type = getType(aJCas.getCas(), l);
Feature dependentFeature = null;
Feature governorFeature = null;
for (Feature feature : type.getFeatures()) {
if (feature.getShortName().equals(DEPENDENT)) {
// check if the dependent is
dependentFeature = feature;
}
if (feature.getShortName().equals(GOVERNOR)) {
governorFeature = feature;
}
}
for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
AnnotationFS depFs = (AnnotationFS) fs.getFeatureValue(dependentFeature);
AnnotationFS govFs = (AnnotationFS) fs.getFeatureValue(governorFeature);
Type govType = govFs.getType();
AnnotationUnit govUnit = getFirstUnit(getUnit(govFs.getBegin(), govFs.getEnd(), govFs.getCoveredText()));
if (ambigUnits.get(govType.getName()).get(govUnit) == null) {
govUnit = getUnit(govFs.getBegin(), govFs.getEnd(), govFs.getCoveredText());
}
AnnotationUnit depUnit = getFirstUnit(getUnit(depFs.getBegin(), depFs.getEnd(), depFs.getCoveredText()));
if (ambigUnits.get(govType.getName()).get(depUnit) == null) {
depUnit = getUnit(depFs.getBegin(), depFs.getEnd(), depFs.getCoveredText());
}
if (type.getName().equals(Dependency.class.getName())) {
govType = aJCas.getCas().getTypeSystem().getType(POS.class.getName());
}
int govRef = 0;
int depRef = 0;
// The WebAnno world do not ever process Token as an annotation
if (!govType.getName().equals(Token.class.getName()) && ambigUnits.get(govType.getName()).get(govUnit).equals(true)) {
govRef = annotaionRefPerType.get(govType).get(govFs);
}
if (!govType.getName().equals(Token.class.getName()) && ambigUnits.get(govType.getName()).get(depUnit).equals(true)) {
depRef = annotaionRefPerType.get(govType).get(depFs);
}
setRelationAnnoPerFeature(annotationsPertype, type, fs, depUnit, govUnit, govRef, depRef, govType);
}
if (annotationsPertype.keySet().size() > 0) {
annotationsPerPostion.put(l, annotationsPertype);
}
}
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv3Writer method setChainAnnoPerFeature.
private void setChainAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo, boolean aMultiUnit, boolean aFirst) {
List<String> annoPerFeatures = new ArrayList<>();
for (Feature feature : aType.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue;
}
String annotation = aFs.getFeatureValueAsString(feature);
if (annotation == null) {
annotation = "*";
} else {
annotation = replaceEscapeChars(annotation);
}
if (feature.getShortName().equals(REF_REL)) {
annotation = annotation + "->" + achainNo + "-" + aLinkNo;
} else if (aMultiUnit) {
annotation = annotation + "[" + achainNo + "]";
} else {
annotation = annotation + "[" + achainNo + "]";
}
featurePerLayer.get(aType.getName()).add(feature.getShortName());
annoPerFeatures.add(annotation);
}
aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
ambigUnits.putIfAbsent(aType.getName(), new HashMap<>());
// coref are always ambig
ambigUnits.get(aType.getName()).put(aUnit, true);
if (annoPerFeatures.size() == 0) {
annoPerFeatures.add("*" + "[" + achainNo + "]");
}
aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
use of org.apache.uima.cas.Feature in project webanno by webanno.
the class WebannoTsv3Writer method setSpanAnnoPerFeature.
private void setSpanAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType, AnnotationFS aFs, AnnotationUnit aUnit, boolean aIsMultiToken, boolean aIsFirst) {
List<String> annoPerFeatures = new ArrayList<>();
featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
int ref = getRefId(aType, aFs, aUnit);
if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) != null && ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)).equals(false)) {
ref = 0;
}
if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)) == null && ambigUnits.get(aType.getName()).get(aUnit).equals(false)) {
ref = 0;
}
for (Feature feature : aType.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT) || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue;
}
// if slot feature
if (slotFeatures != null && slotFeatures.contains(feature.getName())) {
if (aFs.getFeatureValue(feature) != null) {
ArrayFS array = (ArrayFS) aFs.getFeatureValue(feature);
StringBuilder sbRole = new StringBuilder();
StringBuilder sbTarget = new StringBuilder();
for (FeatureStructure linkFS : array.toArray()) {
String role = linkFS.getStringValue(linkFS.getType().getFeatureByBaseName("role"));
AnnotationFS targetFs = (AnnotationFS) linkFS.getFeatureValue(linkFS.getType().getFeatureByBaseName("target"));
Type tType = targetFs.getType();
AnnotationUnit firstUnit = getFirstUnit(targetFs);
ref = getRefId(tType, targetFs, firstUnit);
// Check if the target is ambiguous or not
if (ambigUnits.get(tType.getName()).get(firstUnit).equals(false)) {
ref = 0;
}
if (role == null) {
role = "*";
} else {
// Escape special character
role = replaceEscapeChars(role);
}
if (sbRole.length() < 1) {
sbRole.append(role);
// record the actual target type column number if slot target is
// uima.tcas.Annotation
int targetTypeNumber = 0;
if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
targetTypeNumber = layerMaps.get(tType);
}
sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
} else {
sbRole.append(";");
sbTarget.append(";");
sbRole.append(role);
int targetTypeNumber = 0;
if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
targetTypeNumber = layerMaps.get(tType);
}
sbTarget.append(unitsLineNumber.get(firstUnit)).append(targetTypeNumber == 0 ? "" : "-" + targetTypeNumber).append(ref > 0 ? "[" + ref + "]" : "");
}
}
annoPerFeatures.add(sbRole.toString().isEmpty() ? "_" : sbRole.toString());
annoPerFeatures.add(sbTarget.toString().isEmpty() ? "_" : sbTarget.toString());
} else {
// setting it to null
annoPerFeatures.add("_");
annoPerFeatures.add("_");
}
featurePerLayer.get(aType.getName()).add(ROLE + feature.getName() + "_" + slotLinkTypes.get(feature.getName()));
featurePerLayer.get(aType.getName()).add(slotFeatureTypes.get(feature).getName());
} else {
String annotation = aFs.getFeatureValueAsString(feature);
if (annotation == null) {
annotation = "*";
} else {
// Escape special character
annotation = replaceEscapeChars(annotation);
}
annotation = annotation + (ref > 0 ? "[" + ref + "]" : "");
// only add BIO markers to multiple annotations
setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, annotation);
featurePerLayer.get(aType.getName()).add(feature.getShortName());
}
}
aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
// If the layer do not have a feature at all, add dummy * as a place holder
if (annoPerFeatures.size() == 0) {
setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, "*" + (ref > 0 ? "[" + ref + "]" : ""));
}
aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
Aggregations