use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.
the class WebannoTsv3Writer method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
resetVariables();
setSlotLinkTypes();
setLinkMaps(aJCas);
setTokenSentenceAddress(aJCas);
setAmbiguity(aJCas);
setSpanAnnotation(aJCas);
setChainAnnotation(aJCas);
setRelationAnnotation(aJCas);
writeHeader(docOS);
for (AnnotationUnit unit : units) {
if (sentenceUnits.containsKey(unit)) {
String[] sentWithNl = sentenceUnits.get(unit).split("\n");
IOUtils.write(LF + "#Text=" + escapeSpecial(sentWithNl[0]) + LF, docOS, encoding);
// GITHUB ISSUE 318: New line in sentence should be exported as is
if (sentWithNl.length > 1) {
for (int i = 0; i < sentWithNl.length - 1; i++) {
IOUtils.write("#Text=" + escapeSpecial(sentWithNl[i + 1]) + LF, docOS, encoding);
}
}
}
if (unit.isSubtoken) {
IOUtils.write(unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB, docOS, encoding);
} else {
IOUtils.write(unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB, docOS, encoding);
}
for (String type : featurePerLayer.keySet()) {
List<List<String>> annos = annotationsPerPostion.getOrDefault(type, new HashMap<>()).getOrDefault(unit, new ArrayList<>());
List<String> merged = null;
for (List<String> annofs : annos) {
if (merged == null) {
merged = annofs;
} else {
for (int i = 0; i < annofs.size(); i++) {
merged.set(i, merged.get(i) + "|" + annofs.get(i));
}
}
}
if (merged != null) {
for (String anno : merged) {
IOUtils.write(anno + TAB, docOS, encoding);
}
} else // No annotation of this type in this layer
{
// if type do not have a feature,
if (featurePerLayer.get(type).size() == 0) {
IOUtils.write("_" + TAB, docOS, encoding);
} else {
for (String feature : featurePerLayer.get(type)) {
IOUtils.write("_" + TAB, docOS, encoding);
}
}
}
}
IOUtils.write(LF, docOS, encoding);
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.
the class WebannoTsv3Writer method setTokenSentenceAddress.
private void setTokenSentenceAddress(JCas aJCas) {
int sentNMumber = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
int lineNumber = 1;
for (Token token : selectCovered(Token.class, sentence)) {
AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false, token.getCoveredText());
units.add(unit);
if (lineNumber == 1) {
sentenceUnits.put(unit, sentence.getCoveredText());
}
unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
lineNumber++;
}
sentNMumber++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.
the class WebannoTsv3Writer method setAmbiguity.
private void setAmbiguity(JCas aJCas) {
List<String> spanAndTokenLayers = spanLayers;
spanAndTokenLayers.add(Token.class.getName());
for (String l : spanAndTokenLayers) {
Type type = getType(aJCas.getCas(), l);
ambigUnits.putIfAbsent(type.getName(), new HashMap<>());
for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
AnnotationUnit unit = getFirstUnit(fs);
// multiple token anno
if (isMultipleTokenAnnotation(fs.getBegin(), fs.getEnd())) {
SubTokenAnno sta = new SubTokenAnno();
sta.setBegin(fs.getBegin());
sta.setEnd(fs.getEnd());
sta.setText(fs.getCoveredText());
Set<AnnotationUnit> sus = new LinkedHashSet<>();
for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
ambigUnits.get(type.getName()).put(newUnit, true);
}
} else // stacked anno
if (ambigUnits.get(type.getName()).get(unit) != null) {
ambigUnits.get(type.getName()).put(unit, true);
} else // single or first occurrence of stacked anno
{
ambigUnits.get(type.getName()).put(unit, false);
}
}
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.
the class WebannoTsv3Writer method setSpanAnnotation.
private void setSpanAnnotation(JCas aJCas) {
int i = 0;
// store slot targets for each slot features
for (String l : spanLayers) {
Type type = getType(aJCas.getCas(), l);
for (Feature f : type.getFeatures()) {
if (slotFeatures != null && slotFeatures.contains(f.getName())) {
slotFeatureTypes.put(f, getType(aJCas.getCas(), slotTargets.get(i)));
i++;
}
}
}
for (String l : spanLayers) {
if (l.equals(Token.class.getName())) {
continue;
}
Map<AnnotationUnit, List<List<String>>> annotationsPertype;
if (annotationsPerPostion.get(l) == null) {
annotationsPertype = new HashMap<>();
} else {
annotationsPertype = annotationsPerPostion.get(l);
}
Type type = getType(aJCas.getCas(), l);
for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
AnnotationUnit unit = new AnnotationUnit(fs.getBegin(), fs.getEnd(), false, fs.getCoveredText());
// annotation is per Token
if (units.contains(unit)) {
setSpanAnnoPerFeature(annotationsPertype, type, fs, unit, false, false);
} else // Annotation is on sub-token or multiple tokens
{
SubTokenAnno sta = new SubTokenAnno();
sta.setBegin(fs.getBegin());
sta.setEnd(fs.getEnd());
sta.setText(fs.getCoveredText());
boolean isMultiToken = isMultiToken(fs);
boolean isFirst = true;
Set<AnnotationUnit> sus = new LinkedHashSet<>();
for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
setSpanAnnoPerFeature(annotationsPertype, type, fs, newUnit, isMultiToken, isFirst);
isFirst = false;
}
}
}
if (annotationsPertype.keySet().size() > 0) {
annotationsPerPostion.put(l, annotationsPertype);
}
}
}
use of de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit in project webanno by webanno.
the class WebannoTsv3Writer method setChainAnnotation.
private void setChainAnnotation(JCas aJCas) {
for (String l : chainLayers) {
if (l.equals(Token.class.getName())) {
continue;
}
Map<AnnotationUnit, List<List<String>>> annotationsPertype = null;
Type type = getType(aJCas.getCas(), l + CHAIN);
Feature chainFirst = type.getFeatureByBaseName(FIRST);
int chainNo = 1;
for (FeatureStructure chainFs : selectFS(aJCas.getCas(), type)) {
AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
AnnotationUnit unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
Type lType = linkFs.getType();
// this is the layer with annotations
l = lType.getName();
if (annotationsPerPostion.get(l) == null) {
annotationsPertype = new HashMap<>();
} else {
annotationsPertype = annotationsPerPostion.get(l);
}
Feature linkNext = linkFs.getType().getFeatureByBaseName(NEXT);
int linkNo = 1;
while (linkFs != null) {
AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);
if (nextLinkFs != null) {
addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
} else {
addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
}
linkFs = nextLinkFs;
linkNo++;
if (nextLinkFs != null) {
unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
}
}
if (annotationsPertype.keySet().size() > 0) {
annotationsPerPostion.put(l, annotationsPertype);
}
chainNo++;
}
}
}
Aggregations