use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.
the class Tsv3XDeserializer method getOrCreateSpanAnnotation.
private AnnotationFS getOrCreateSpanAnnotation(TsvColumn aCol, TsvUnit aUnit, int aStackingIndex, String aDisambiguationInfo) {
int disambiguationId = aDisambiguationInfo != null ? Integer.valueOf(aDisambiguationInfo) : -1;
// Check if we have seen the same annotation already in the current unit but in another
// column.
AnnotationFS annotation = aUnit.getUimaAnnotation(aCol.uimaType, aStackingIndex);
// If not, check if we have seen the same annotation already in a previous unit
if (annotation == null && disambiguationId != -1) {
annotation = aUnit.getDocument().getDisambiguatedAnnotation(disambiguationId);
if (annotation != null) {
aUnit.addUimaAnnotation(annotation);
// Extend the span of the existing annotation
// Unfortunately, the AnnotationFS interface does not define a setEnd() method.
setFeature(annotation, CAS.FEATURE_BASE_NAME_END, aUnit.getEnd());
}
}
// Still no annotation? Then we have to create one
if (annotation == null) {
annotation = aUnit.getDocument().getJCas().getCas().createAnnotation(aCol.uimaType, aUnit.getBegin(), aUnit.getEnd());
aUnit.addUimaAnnotation(annotation);
// Check if there are slot features that need to be initialized
List<TsvColumn> otherColumnsForType = aUnit.getDocument().getSchema().getColumns(aCol.uimaType);
for (TsvColumn col : otherColumnsForType) {
if (SLOT_TARGET.equals(col.featureType)) {
setFeature(annotation, col.uimaFeature.getShortName(), emptyList());
}
}
// Special handling of DKPro Core Token-attached annotations
if (Lemma.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setLemma((Lemma) annotation);
}
if (Stem.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setStem((Stem) annotation);
}
if (MorphologicalFeatures.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setMorph((MorphologicalFeatures) annotation);
}
if (POS.class.getName().equals(aCol.uimaType.getName())) {
TsvToken token = (TsvToken) aUnit;
token.getUimaToken().setPos((POS) annotation);
}
}
// to extend the range of multi-token IDs.
if (disambiguationId != -1) {
aUnit.getDocument().addDisambiguationId(annotation, disambiguationId);
}
return annotation;
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testTokenAttachedAnnotationsWithValues.
@Test
public void testTokenAttachedAnnotationsWithValues() throws Exception {
JCas jcas = makeJCasOneSentence();
List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
Token t1 = tokens.get(0);
Lemma l1 = new Lemma(jcas, t1.getBegin(), t1.getEnd());
l1.setValue("lemma1");
l1.addToIndexes();
t1.setLemma(l1);
MorphologicalFeatures m1 = new MorphologicalFeatures(jcas, t1.getBegin(), t1.getEnd());
m1.setValue("morph");
m1.setTense("tense1");
m1.addToIndexes();
t1.setMorph(m1);
POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
p1.setPosValue("pos1");
p1.addToIndexes();
t1.setPos(p1);
Stem s1 = new Stem(jcas, t1.getBegin(), t1.getEnd());
s1.setValue("stem1");
s1.addToIndexes();
t1.setStem(s1);
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(MorphologicalFeatures.class, POS.class, Lemma.class, Stem.class));
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.
the class MergeCas method reMergeCas.
/**
* Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
* MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
* <p>
* Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
* <p>
* Any two annotation with different value will be assumed a difference
* <p>
* Any non stacked empty/null annotations are assumed agreement
* <p>
* Any non stacked annotations with similar values for each of the features are assumed
* agreement
* <p>
* Any two link mode / slotable annotations which agree on the base features are assumed
* agreement
*
* @param aDiff
* the {@code CasDiff2.DiffResult}
* @param aJCases
* a map of{@code JCas}s for each users and the random merge
* @return the actual merge {@code JCas}
*/
public static JCas reMergeCas(DiffResult aDiff, Map<String, JCas> aJCases) {
Set<FeatureStructure> slotFeaturesToReset = new HashSet<>();
Set<FeatureStructure> annotationsToDelete = new HashSet<>();
Set<String> users = aJCases.keySet();
for (Position position : aDiff.getPositions()) {
Map<String, List<FeatureStructure>> annosPerUser = new HashMap<>();
ConfigurationSet cfgs = aDiff.getConfigurtionSet(position);
if (cfgs.getConfigurations(WebAnnoConst.CURATION_USER).isEmpty()) {
// annotations
continue;
}
AnnotationFS mergeAnno = (AnnotationFS) cfgs.getConfigurations(WebAnnoConst.CURATION_USER).get(0).getFs(WebAnnoConst.CURATION_USER, aJCases);
// Get Annotations per user in this position
getAllAnnosOnPosition(aJCases, annosPerUser, users, mergeAnno);
for (FeatureStructure mergeFs : annosPerUser.get(WebAnnoConst.CURATION_USER)) {
// incomplete annotations
if (aJCases.size() != annosPerUser.size()) {
annotationsToDelete.add(mergeFs);
} else // agreed and not stacked
if (isAgree(mergeFs, annosPerUser)) {
Type t = mergeFs.getType();
Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
// Is this a relation?
if (sourceFeat != null && targetFeat != null) {
AnnotationFS source = (AnnotationFS) mergeFs.getFeatureValue(sourceFeat);
AnnotationFS target = (AnnotationFS) mergeFs.getFeatureValue(targetFeat);
// all span anno on this source positions
Map<String, List<FeatureStructure>> sourceAnnosPerUser = new HashMap<>();
// all span anno on this target positions
Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
getAllAnnosOnPosition(aJCases, sourceAnnosPerUser, users, source);
getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, target);
if (isAgree(source, sourceAnnosPerUser) && isAgree(target, targetAnnosPerUser)) {
slotFeaturesToReset.add(mergeFs);
} else {
annotationsToDelete.add(mergeFs);
}
} else {
slotFeaturesToReset.add(mergeFs);
}
} else // disagree or stacked annotations
{
annotationsToDelete.add(mergeFs);
}
// remove dangling rels
// setDanglingRelToDel(aJCases.get(CurationPanel.CURATION_USER),
// mergeFs, annotationsToDelete);
}
}
// remove annotations that do not agree or are a stacked ones
for (FeatureStructure fs : annotationsToDelete) {
if (!slotFeaturesToReset.contains(fs)) {
JCas mergeCas = aJCases.get(WebAnnoConst.CURATION_USER);
// Check if this difference is on POS, STEM and LEMMA (so remove from the token too)
Type type = fs.getType();
int fsBegin = ((AnnotationFS) fs).getBegin();
int fsEnd = ((AnnotationFS) fs).getEnd();
if (type.getName().equals(POS.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setPos(null);
}
if (type.getName().equals(Stem.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setStem(null);
}
if (type.getName().equals(Lemma.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setLemma(null);
}
if (type.getName().equals(MorphologicalFeatures.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setMorph(null);
}
mergeCas.removeFsFromIndexes(fs);
}
}
// if slot bearing annotation, clean
for (FeatureStructure baseFs : slotFeaturesToReset) {
for (Feature roleFeature : baseFs.getType().getFeatures()) {
if (isLinkMode(baseFs, roleFeature)) {
// FeatureStructure roleFs = baseFs.getFeatureValue(f);
ArrayFS roleFss = (ArrayFS) WebAnnoCasUtil.getFeatureFS(baseFs, roleFeature.getShortName());
if (roleFss == null) {
continue;
}
Map<String, ArrayFS> roleAnnosPerUser = new HashMap<>();
setAllRoleAnnosOnPosition(aJCases, roleAnnosPerUser, users, baseFs, roleFeature);
List<FeatureStructure> linkFSes = new LinkedList<>(Arrays.asList(roleFss.toArray()));
for (FeatureStructure roleFs : roleFss.toArray()) {
if (isRoleAgree(roleFs, roleAnnosPerUser)) {
for (Feature targetFeature : roleFs.getType().getFeatures()) {
if (isBasicFeature(targetFeature)) {
continue;
}
if (!targetFeature.getShortName().equals("target")) {
continue;
}
AnnotationFS targetFs = (AnnotationFS) roleFs.getFeatureValue(targetFeature);
if (targetFs == null) {
continue;
}
Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, targetFs);
// do not agree on targets
if (!isAgree(targetFs, targetAnnosPerUser)) {
linkFSes.remove(roleFs);
}
}
} else // do not agree on some role features
{
linkFSes.remove(roleFs);
}
}
ArrayFS array = baseFs.getCAS().createArrayFS(linkFSes.size());
array.copyFromArray(linkFSes.toArray(new FeatureStructure[linkFSes.size()]), 0, 0, linkFSes.size());
baseFs.setFeatureValue(roleFeature, array);
}
}
}
return aJCases.get(WebAnnoConst.CURATION_USER);
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.
the class WebannoTsv3Reader method addAnnotations.
/**
* Importing span annotations including slot annotations.
*/
private void addAnnotations(JCas aJCas, Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> aAnnosPerTypePerUnit) {
for (Type type : annotationsPerPostion.keySet()) {
Map<AnnotationUnit, Map<Integer, AnnotationFS>> multiTokUnits = new HashMap<>();
int ref = 1;
// to see if it is on multiple token
AnnotationFS prevAnnoFs = null;
for (AnnotationUnit unit : annotationsPerPostion.get(type).keySet()) {
int end = unit.end;
List<AnnotationFS> annos = aAnnosPerTypePerUnit.get(type).get(unit);
int j = 0;
Feature linkeF = null;
Map<AnnotationFS, List<FeatureStructure>> linkFSesPerSlotAnno = new HashMap<>();
if (allLayers.get(type).size() == 0) {
ref = addAnnotationWithNoFeature(aJCas, type, unit, annos, multiTokUnits, end, ref);
continue;
}
for (Feature feat : allLayers.get(type)) {
String anno = annotationsPerPostion.get(type).get(unit).get(j);
if (!anno.equals("_")) {
int i = 0;
// if it is a slot annotation (multiple slots per
// single annotation
// (Target1<--role1--Base--role2-->Target2)
int slot = 0;
boolean targetAdd = false;
String stackedAnnoRegex = "(?<!\\\\)" + Pattern.quote("|");
String[] stackedAnnos = anno.split(stackedAnnoRegex);
for (String mAnnos : stackedAnnos) {
String multipleSlotAnno = "(?<!\\\\)" + Pattern.quote(";");
for (String mAnno : mAnnos.split(multipleSlotAnno)) {
String depRef = "";
String multSpliter = "(?<!\\\\)" + Pattern.quote("[");
// is this slot target ambiguous?
boolean ambigTarget = false;
if (mAnno.split(multSpliter).length > 1) {
ambigTarget = true;
depRef = mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1);
ref = depRef.contains("_") ? ref : Integer.valueOf(mAnno.substring(mAnno.indexOf("[") + 1, mAnno.length() - 1));
mAnno = mAnno.substring(0, mAnno.indexOf("["));
}
if (mAnno.equals("*")) {
mAnno = null;
}
boolean isMultitoken = false;
if (!multiTokUnits.isEmpty() && prevAnnoFs != null && prevAnnoFs.getBegin() != unit.begin) {
contAnno: for (AnnotationUnit u : multiTokUnits.keySet()) {
for (Integer r : multiTokUnits.get(u).keySet()) {
if (ref == r) {
isMultitoken = true;
prevAnnoFs = multiTokUnits.get(u).get(r);
break contAnno;
}
}
}
}
if (isMultitoken) {
Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
prevAnnoFs.setIntValue(endF, end);
mAnno = getEscapeChars(mAnno);
prevAnnoFs.setFeatureValueFromString(feat, mAnno);
if (feat.getShortName().equals(REF_LINK)) {
// since REF_REL do not start with BIO,
// update it it...
annos.set(i, prevAnnoFs);
}
setAnnoRefPerUnit(unit, type, ref, prevAnnoFs);
} else {
if (roleLinks.containsKey(feat)) {
linkeF = feat;
FeatureStructure link = aJCas.getCas().createFS(slotLinkTypes.get(feat));
Feature roleFeat = link.getType().getFeatureByBaseName("role");
mAnno = getEscapeChars(mAnno);
link.setStringValue(roleFeat, mAnno);
linkFSesPerSlotAnno.putIfAbsent(annos.get(i), new ArrayList<>());
linkFSesPerSlotAnno.get(annos.get(i)).add(link);
} else if (roleTargets.containsKey(feat)) {
FeatureStructure link = linkFSesPerSlotAnno.get(annos.get(i)).get(slot);
int customTypeNumber = 0;
if (mAnno.split("-").length > 2) {
customTypeNumber = Integer.valueOf(mAnno.substring(mAnno.lastIndexOf("-") + 1));
mAnno = mAnno.substring(0, mAnno.lastIndexOf("-"));
}
AnnotationUnit targetUnit = token2Units.get(mAnno);
Type tType = null;
if (customTypeNumber == 0) {
tType = roleTargets.get(feat);
} else {
tType = layerMaps.get(customTypeNumber);
}
AnnotationFS targetFs;
if (ambigTarget) {
targetFs = annosPerRef.get(tType).get(targetUnit).get(ref);
} else {
targetFs = annosPerRef.get(tType).get(targetUnit).entrySet().iterator().next().getValue();
}
link.setFeatureValue(feat, targetFs);
addSlotAnnotations(linkFSesPerSlotAnno, linkeF);
targetAdd = true;
slot++;
} else if (feat.getShortName().equals(REF_REL)) {
int chainNo = Integer.valueOf(mAnno.split("->")[1].split("-")[0]);
int LinkNo = Integer.valueOf(mAnno.split("->")[1].split("-")[1]);
chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
if (chainAnnosPerTyep.get(type).get(chainNo) != null && chainAnnosPerTyep.get(type).get(chainNo).get(LinkNo) != null) {
continue;
}
String refRel = mAnno.split("->")[0];
refRel = getEscapeChars(refRel);
if (refRel.equals("*")) {
refRel = null;
}
annos.get(i).setFeatureValueFromString(feat, refRel);
chainAnnosPerTyep.putIfAbsent(type, new TreeMap<>());
chainAnnosPerTyep.get(type).putIfAbsent(chainNo, new TreeMap<>());
chainAnnosPerTyep.get(type).get(chainNo).put(LinkNo, annos.get(i));
} else if (feat.getShortName().equals(REF_LINK)) {
mAnno = getEscapeChars(mAnno);
annos.get(i).setFeatureValueFromString(feat, mAnno);
aJCas.addFsToIndexes(annos.get(i));
} else if (depFeatures.get(type) != null && depFeatures.get(type).equals(feat)) {
int g = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[0]);
int d = depRef.isEmpty() ? 0 : Integer.valueOf(depRef.split("_")[1]);
Type depType = depTypess.get(type);
AnnotationUnit govUnit = token2Units.get(mAnno);
int l = annotationsPerPostion.get(type).get(unit).size();
String thisUnit = annotationsPerPostion.get(type).get(unit).get(l - 1);
AnnotationUnit depUnit = token2Units.get(thisUnit);
AnnotationFS govFs;
AnnotationFS depFs;
if (depType.getName().equals(POS.class.getName())) {
depType = aJCas.getCas().getTypeSystem().getType(Token.class.getName());
govFs = units2Tokens.get(govUnit);
depFs = units2Tokens.get(unit);
} else // in WebAnno world :)(!
if (depType.getName().equals(Token.class.getName())) {
govFs = units2Tokens.get(govUnit);
depFs = units2Tokens.get(unit);
} else if (g == 0 && d == 0) {
govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
} else if (g == 0) {
govFs = annosPerRef.get(depType).get(govUnit).entrySet().iterator().next().getValue();
depFs = annosPerRef.get(depType).get(depUnit).get(d);
} else {
govFs = annosPerRef.get(depType).get(govUnit).get(g);
depFs = annosPerRef.get(depType).get(depUnit).entrySet().iterator().next().getValue();
}
annos.get(i).setFeatureValue(feat, depFs);
annos.get(i).setFeatureValue(type.getFeatureByBaseName(GOVERNOR), govFs);
if (depFs.getBegin() <= annos.get(i).getBegin()) {
Feature beginF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN);
annos.get(i).setIntValue(beginF, depFs.getBegin());
} else {
Feature endF = type.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
annos.get(i).setIntValue(endF, depFs.getEnd());
}
aJCas.addFsToIndexes(annos.get(i));
} else {
mAnno = getEscapeChars(mAnno);
multiTokUnits.putIfAbsent(unit, new HashMap<>());
multiTokUnits.get(unit).put(ref, annos.get(i));
prevAnnoFs = annos.get(i);
annos.get(i).setFeatureValueFromString(feat, mAnno);
aJCas.addFsToIndexes(annos.get(i));
setAnnoRefPerUnit(unit, type, ref, annos.get(i));
}
}
if (stackedAnnos.length > 1) {
ref++;
}
}
if (type.getName().equals(POS.class.getName())) {
units2Tokens.get(unit).setPos((POS) annos.get(i));
}
if (type.getName().equals(Lemma.class.getName())) {
units2Tokens.get(unit).setLemma((Lemma) annos.get(i));
}
if (type.getName().equals(Stem.class.getName())) {
units2Tokens.get(unit).setStem((Stem) annos.get(i));
}
if (type.getName().equals(MorphologicalFeatures.class.getName())) {
units2Tokens.get(unit).setMorph((MorphologicalFeatures) annos.get(i));
}
i++;
}
if (targetAdd) {
linkFSesPerSlotAnno = new HashMap<>();
}
} else {
prevAnnoFs = null;
}
j++;
}
if (prevAnnoFs != null) {
ref++;
}
}
annosPerRef.put(type, multiTokUnits);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem in project webanno by webanno.
the class RemoveZeroSizeTokensAndSentencesRepair method repair.
@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
try {
for (Sentence s : select(aCas.getJCas(), Sentence.class)) {
if (s.getBegin() >= s.getEnd()) {
s.removeFromIndexes();
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed sentence with illegal span: %s", s));
}
}
for (Token t : select(aCas.getJCas(), Token.class)) {
if (t.getBegin() >= t.getEnd()) {
Lemma lemma = t.getLemma();
if (lemma != null) {
lemma.removeFromIndexes();
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed lemma attached to token with illegal span: %s", t));
}
POS pos = t.getPos();
if (pos != null) {
pos.removeFromIndexes();
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed POS attached to token with illegal span: %s", t));
}
Stem stem = t.getStem();
if (stem != null) {
stem.removeFromIndexes();
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed stem attached to token with illegal span: %s", t));
}
t.removeFromIndexes();
aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed token with illegal span: %s", t));
}
}
} catch (CASException e) {
log.error("Unabled to access JCas", e);
aMessages.add(new LogMessage(this, LogLevel.ERROR, "Unabled to access JCas", e.getMessage()));
}
}
Aggregations