use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter in project webanno by webanno.
the class CasDiffTest method relationStackedSpansTest.
@Test
public void relationStackedSpansTest() throws Exception {
TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
TypeSystemDescription local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
JCas jcasA = JCasFactory.createJCas(merged);
{
CAS casA = jcasA.getCas();
tb.buildTokens(jcasA, "This is a test .");
List<Token> tokensA = new ArrayList<>(select(jcasA, Token.class));
Token t1A = tokensA.get(0);
Token t2A = tokensA.get(tokensA.size() - 1);
NamedEntity govA = new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd());
govA.addToIndexes();
// Here we add a stacked named entity!
new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd()).addToIndexes();
NamedEntity depA = new NamedEntity(jcasA, t2A.getBegin(), t2A.getEnd());
depA.addToIndexes();
Type relationTypeA = casA.getTypeSystem().getType("webanno.custom.Relation");
AnnotationFS fs1A = casA.createAnnotation(relationTypeA, depA.getBegin(), depA.getEnd());
FSUtil.setFeature(fs1A, "Governor", govA);
FSUtil.setFeature(fs1A, "Dependent", depA);
FSUtil.setFeature(fs1A, "value", "REL");
casA.addFsToIndexes(fs1A);
}
JCas jcasB = JCasFactory.createJCas(merged);
{
CAS casB = jcasB.getCas();
tb.buildTokens(jcasB, "This is a test .");
List<Token> tokensB = new ArrayList<>(select(jcasB, Token.class));
Token t1B = tokensB.get(0);
Token t2B = tokensB.get(tokensB.size() - 1);
NamedEntity govB = new NamedEntity(jcasB, t1B.getBegin(), t1B.getEnd());
govB.addToIndexes();
NamedEntity depB = new NamedEntity(jcasB, t2B.getBegin(), t2B.getEnd());
depB.addToIndexes();
Type relationTypeB = casB.getTypeSystem().getType("webanno.custom.Relation");
AnnotationFS fs1B = casB.createAnnotation(relationTypeB, depB.getBegin(), depB.getEnd());
FSUtil.setFeature(fs1B, "Governor", govB);
FSUtil.setFeature(fs1B, "Dependent", depB);
FSUtil.setFeature(fs1B, "value", "REL");
casB.addFsToIndexes(fs1B);
}
Map<String, List<CAS>> casByUser = new LinkedHashMap<>();
casByUser.put("user1", asList(jcasA.getCas()));
casByUser.put("user2", asList(jcasB.getCas()));
List<? extends DiffAdapter> diffAdapters = asList(new RelationDiffAdapter("webanno.custom.Relation", WebAnnoConst.FEAT_REL_TARGET, WebAnnoConst.FEAT_REL_SOURCE, "value"));
CasDiff diff = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser);
DiffResult result = diff.toResult();
// result.print(System.out);
assertEquals(1, result.size());
assertEquals(0, result.getDifferingConfigurationSets().size());
assertEquals(0, result.getIncompleteConfigurationSets().size());
// Todo: Agreement has moved to separate project - should create agreement test there
// CodingAgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff,
// "webanno.custom.Relation", "value", casByUser);
//
// // Asserts
// System.out.printf("Agreement: %s%n", agreement.toString());
// AgreementUtils.dumpAgreementStudy(System.out, agreement);
//
// assertEquals(1, agreement.getPluralitySets().size());
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter in project webanno by webanno.
the class CasDiffTest method relationLabelTest.
@Test
public void relationLabelTest() throws Exception {
Map<String, List<CAS>> casByUser = new HashMap<>();
casByUser.put("user1", asList(loadWebAnnoTsv3("testsuite/" + testContext.getMethodName() + "/user1.tsv").getCas()));
casByUser.put("user2", asList(loadWebAnnoTsv3("testsuite/" + testContext.getMethodName() + "/user2.tsv").getCas()));
List<? extends DiffAdapter> diffAdapters = asList(new RelationDiffAdapter(Dependency.class.getName(), "Dependent", "Governor", "DependencyType"));
CasDiff diff = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser);
DiffResult result = diff.toResult();
// result.print(System.out);
assertEquals(26, result.size());
assertEquals(1, result.getDifferingConfigurationSets().size());
assertEquals(0, result.getIncompleteConfigurationSets().size());
// Todo: Agreement has moved to separate project - should create agreement test there
// CodingAgreementResult agreement = getCohenKappaAgreement(diff, entryTypes.get(0),
// "DependencyType", casByUser);
// assertEquals(0.958199d, agreement.getAgreement(), 0.000001d);
// assertEquals(0, agreement.getIncompleteSetsByPosition().size());
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter in project webanno by webanno.
the class CasDiffTest method relationDistanceTest.
@Test
public void relationDistanceTest() throws Exception {
Map<String, List<CAS>> casByUser = load("casdiff/relationDistance/user1.conll", "casdiff/relationDistance/user2.conll");
List<? extends DiffAdapter> diffAdapters = asList(new RelationDiffAdapter(Dependency.class.getName(), "Dependent", "Governor", "DependencyType"));
CasDiff diff = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser);
DiffResult result = diff.toResult();
// result.print(System.out);
assertEquals(27, result.size());
assertEquals(0, result.getDifferingConfigurationSets().size());
assertEquals(2, result.getIncompleteConfigurationSets().size());
// Todo: Agreement has moved to separate project - should create agreement test there
// CodingAgreementResult agreement = getCohenKappaAgreement(diff, entryTypes.get(0),
// "DependencyType", casByUser);
// assertEquals(1.0, agreement.getAgreement(), 0.000001d);
// assertEquals(2, agreement.getIncompleteSetsByPosition().size());
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter in project webanno by webanno.
the class AgreementUtils method makeCodingStudy.
private static CodingAgreementResult makeCodingStudy(CasDiff aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<CAS>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
CAS someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurationSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof RelationPosition) {
RelationPosition pos = (RelationPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
RelationDiffAdapter adapter = (RelationDiffAdapter) aDiff.getTypeAdapters().get(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
values[i] = extractValueForAgreement(fs, aFeature, cfg.getAID(user).index, cfg.getPosition().getLinkCompareBehavior());
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter in project webanno by webanno.
the class CasDiff method getDiffAdapters.
public static List<DiffAdapter> getDiffAdapters(AnnotationSchemaService schemaService, Iterable<AnnotationLayer> aLayers) {
List<DiffAdapter> adapters = new ArrayList<>();
nextLayer: for (AnnotationLayer layer : aLayers) {
if (!layer.isEnabled()) {
continue nextLayer;
}
Set<String> labelFeatures = new LinkedHashSet<>();
nextFeature: for (AnnotationFeature f : schemaService.listSupportedFeatures(layer)) {
if (!f.isEnabled()) {
continue nextFeature;
}
// Link features are treated separately from primitive label features
if (!NONE.equals(f.getLinkMode())) {
continue nextFeature;
}
labelFeatures.add(f.getName());
}
DiffAdapter_ImplBase adapter;
switch(layer.getType()) {
case SPAN_TYPE:
{
adapter = new SpanDiffAdapter(layer.getName(), labelFeatures);
break;
}
case RELATION_TYPE:
{
RelationAdapter typeAdpt = (RelationAdapter) schemaService.getAdapter(layer);
adapter = new RelationDiffAdapter(layer.getName(), typeAdpt.getSourceFeatureName(), typeAdpt.getTargetFeatureName(), labelFeatures);
break;
}
default:
LOG.debug("Curation for layer type [{}] not supported - ignoring", layer.getType());
continue nextLayer;
}
adapters.add(adapter);
nextFeature: for (AnnotationFeature f : schemaService.listSupportedFeatures(layer)) {
if (!f.isEnabled()) {
continue nextFeature;
}
switch(f.getLinkMode()) {
case NONE:
// Nothing to do here
break;
case SIMPLE:
adapter.addLinkFeature(f.getName(), f.getLinkTypeRoleFeatureName(), null);
break;
case WITH_ROLE:
adapter.addLinkFeature(f.getName(), f.getLinkTypeRoleFeatureName(), f.getLinkTypeTargetFeatureName());
break;
default:
throw new IllegalStateException("Unknown link mode [" + f.getLinkMode() + "]");
}
labelFeatures.add(f.getName());
}
}
return adapters;
}
Aggregations