use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter in project webanno by webanno.
the class MergeCasTest method simpleRelGovDiffTest.
@Test
public void simpleRelGovDiffTest() throws Exception {
Map<String, List<JCas>> casByUser = DiffUtils.loadWebAnnoTSV(null, "mergecas/rels/1sentencesamerel.tsv", "mergecas/rels/1sentencesamerel2.tsv");
List<String> entryTypes = asList(Dependency.class.getName(), POS.class.getName());
List<? extends DiffAdapter> diffAdapters = asList(new ArcDiffAdapter(Dependency.class.getName(), "Dependent", "Governor", "DependencyType"), SpanDiffAdapter.POS);
addRandomMergeCas(casByUser);
DiffResult result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
JCas mergeCas = MergeCas.reMergeCas(result, getSingleCasByUser(casByUser));
casByUser = new HashMap<>();
JCas actual = DiffUtils.readWebAnnoTSV("mergecas/rels/1sentencesamerel3.tsv", null);
casByUser.put("actual", asList(actual));
casByUser.put("merge", asList(mergeCas));
result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
assertEquals(0, result.getDifferingConfigurationSets().size());
assertEquals(0, result.getIncompleteConfigurationSets().size());
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter in project webanno by webanno.
the class MergeCasTest method relStackedTest.
@Test
public void relStackedTest() throws Exception {
TypeSystemDescription customeTypesSpan = DiffUtils.createCustomTypeSystem(SPAN_TYPE, "webanno.custom.Multivalspan", asList("f1", "f2"), null);
TypeSystemDescription customeTypesRel = DiffUtils.createCustomTypeSystem(RELATION_TYPE, "webanno.custom.Multivalrel", asList("rel1", "rel2"), "webanno.custom.Multivalspan");
List<TypeSystemDescription> customTypes = new ArrayList<>();
customTypes.add(customeTypesSpan);
customTypes.add(customeTypesRel);
TypeSystemDescription customType = CasCreationUtils.mergeTypeSystems(customTypes);
Map<String, List<JCas>> casByUser = DiffUtils.loadXMI(customType, "mergecas/multivalspanrel/stackedrel1.xmi", "mergecas/multivalspanrel/stackedrel2.xmi");
List<String> entryTypes = asList("webanno.custom.Multivalspan", "webanno.custom.Multivalrel");
List<? extends DiffAdapter> diffAdapters = asList(new ArcDiffAdapter("webanno.custom.Multivalrel", "Dependent", "Governor", "rel1", "rel2"), new SpanDiffAdapter("webanno.custom.Multivalspan", "f1", "f2"));
addRandomMergeCas(casByUser);
DiffResult result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
JCas mergeCas = MergeCas.reMergeCas(result, getSingleCasByUser(casByUser));
JCas actual = DiffUtils.readXMI("mergecas/multivalspanrel/stackedmerge.xmi", customType);
Type relType = mergeCas.getTypeSystem().getType("webanno.custom.Multivalrel");
int numRelMerge = CasUtil.select(mergeCas.getCas(), relType).size();
int numRelActual = CasUtil.select(actual.getCas(), relType).size();
Type spanType = mergeCas.getTypeSystem().getType("webanno.custom.Multivalspan");
int numspanMerge = CasUtil.select(mergeCas.getCas(), spanType).size();
int numspanActual = CasUtil.select(actual.getCas(), spanType).size();
assertEquals(2, numRelMerge);
assertEquals(2, numRelActual);
assertEquals(4, numspanMerge);
assertEquals(4, numspanActual);
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter in project webanno by webanno.
the class TwoPairedKappaTest method testTwoUserDiffArcAnnotation.
@Test
public void testTwoUserDiffArcAnnotation() throws Exception {
Map<User, List<SourceDocument>> userDocs = new HashMap<>();
userDocs.put(user1, asList(document));
userDocs.put(user2, asList(document));
Map<User, JCas> userCases = new HashMap<>();
userCases.put(user1, kappatestCas.getJCas());
userCases.put(user2, kappaarcdiff.getJCas());
Map<SourceDocument, Map<User, JCas>> documentJCases = new HashMap<>();
documentJCases.put(document, userCases);
// Check against new impl
DiffResult diff = CasDiff2.doDiff(Dependency.class, new ArcDiffAdapter(Dependency.class, "Dependent", "Governor", "DependencyType"), LinkCompareBehavior.LINK_TARGET_AS_LABEL, convert(userCases));
AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff, Dependency.class.getName(), "DependencyType", convert(userCases));
// Asserts
System.out.printf("Agreement: %s%n", agreement.toString());
diff.print(System.out);
assertEquals(0.86153d, agreement.getAgreement(), 0.00001d);
assertEquals(9, diff.size());
assertEquals(1, diff.getDifferingConfigurationSets().size());
assertEquals(0, diff.getIncompleteConfigurationSets().size());
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter in project webanno by webanno.
the class AgreementUtils method makeStudy.
private static AgreementResult makeStudy(DiffResult aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<JCas>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
JCas someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurtionSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof ArcPosition) {
ArcPosition pos = (ArcPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
ArcDiffAdapter adapter = (ArcDiffAdapter) aDiff.getDiffAdapter(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
// BEGIN PARANOIA
assert fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() == isPrimitiveFeature;
// should never have gotten here in the first place.
assert !isPrimitiveFeature || !isSubPosition;
if (isPrimitiveFeature && !isSubPosition) {
// Primitive feature / primary position
values[i] = getFeature(fs, aFeature);
} else if (!isPrimitiveFeature && isSubPosition) {
// Link feature / sub-position
ArrayFS links = (ArrayFS) fs.getFeatureValue(fs.getType().getFeatureByBaseName(aFeature));
FeatureStructure link = links.get(cfg.getAID(user).index);
switch(cfg.getPosition().getLinkCompareBehavior()) {
case LINK_TARGET_AS_LABEL:
// FIXME The target feature name should be obtained from the feature
// definition!
AnnotationFS target = (AnnotationFS) link.getFeatureValue(link.getType().getFeatureByBaseName("target"));
values[i] = target.getBegin() + "-" + target.getEnd() + " [" + target.getCoveredText() + "]";
break;
case LINK_ROLE_AS_LABEL:
// FIXME The role feature name should be obtained from the feature
// definition!
String role = link.getStringValue(link.getType().getFeatureByBaseName("role"));
values[i] = role;
break;
default:
throw new IllegalStateException("Unknown link target comparison mode [" + cfg.getPosition().getLinkCompareBehavior() + "]");
}
} else {
throw new IllegalStateException("Should never get here: primitive: " + fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() + "; subpos: " + isSubPosition);
}
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter in project webanno by webanno.
the class CasDiff2Test method relationDistanceTest.
@Test
public void relationDistanceTest() throws Exception {
Map<String, List<JCas>> casByUser = DiffUtils.load("casdiff/relationDistance/user1.conll", "casdiff/relationDistance/user2.conll");
List<String> entryTypes = asList(Dependency.class.getName());
List<? extends DiffAdapter> diffAdapters = asList(new ArcDiffAdapter(Dependency.class.getName(), "Dependent", "Governor", "DependencyType"));
DiffResult result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
result.print(System.out);
assertEquals(27, result.size());
assertEquals(0, result.getDifferingConfigurationSets().size());
assertEquals(2, result.getIncompleteConfigurationSets().size());
AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(result, entryTypes.get(0), "DependencyType", casByUser);
assertEquals(1.0, agreement.getAgreement(), 0.000001d);
assertEquals(2, agreement.getIncompleteSetsByPosition().size());
}
Aggregations