use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet in project webanno by webanno.
the class AgreementUtils method makeCodingStudy.
private static CodingAgreementResult makeCodingStudy(CasDiff aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<CAS>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
CAS someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurationSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof RelationPosition) {
RelationPosition pos = (RelationPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
RelationDiffAdapter adapter = (RelationDiffAdapter) aDiff.getTypeAdapters().get(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
values[i] = extractValueForAgreement(fs, aFeature, cfg.getAID(user).index, cfg.getPosition().getLinkCompareBehavior());
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet in project webanno by webanno.
the class SuggestionBuilder method buildCurationContainer.
public CurationContainer buildCurationContainer(AnnotatorState aState) throws UIMAException, ClassNotFoundException, IOException, AnnotationException {
CurationContainer curationContainer = new CurationContainer();
// initialize Variables
SourceDocument sourceDocument = aState.getDocument();
Map<Integer, Integer> segmentBeginEnd = new HashMap<>();
Map<Integer, Integer> segmentNumber = new HashMap<>();
Map<String, Map<Integer, Integer>> segmentAdress = new HashMap<>();
// get annotation documents
List<AnnotationDocument> finishedAnnotationDocuments = new ArrayList<>();
for (AnnotationDocument annotationDocument : documentService.listAnnotationDocuments(aState.getDocument())) {
if (annotationDocument.getState().equals(AnnotationDocumentState.FINISHED)) {
finishedAnnotationDocuments.add(annotationDocument);
}
}
Map<String, CAS> casses = new HashMap<>();
AnnotationDocument randomAnnotationDocument = null;
CAS mergeCas;
// get the correction/automation CAS for the logged in user
if (aState.getMode().equals(AUTOMATION) || aState.getMode().equals(CORRECTION)) {
casses = listCasesforCorrection(randomAnnotationDocument, sourceDocument, aState.getMode());
mergeCas = getMergeCas(aState, sourceDocument, casses, randomAnnotationDocument, false, false, false);
String username = casses.keySet().iterator().next();
updateSegment(aState, segmentBeginEnd, segmentNumber, segmentAdress, casses.get(username), username, aState.getWindowBeginOffset(), aState.getWindowEndOffset());
} else {
casses = listCassesforCuration(finishedAnnotationDocuments, aState.getMode());
mergeCas = getMergeCas(aState, sourceDocument, casses, randomAnnotationDocument, false, false, false);
updateSegment(aState, segmentBeginEnd, segmentNumber, segmentAdress, mergeCas, CURATION_USER, getFirstSentence(mergeCas).getBegin(), mergeCas.getDocumentText().length());
}
segmentAdress.put(CURATION_USER, new HashMap<>());
Type sentenceType = getType(mergeCas, Sentence.class);
for (AnnotationFS s : selectCovered(mergeCas, sentenceType, diffRangeBegin, diffRangeEnd)) {
segmentAdress.get(CURATION_USER).put(s.getBegin(), getAddr(s));
}
List<DiffAdapter> adapters = getDiffAdapters(schemaService, aState.getAnnotationLayers());
long diffStart = System.currentTimeMillis();
log.debug("Calculating differences...");
int count = 0;
for (Integer begin : segmentBeginEnd.keySet()) {
Integer end = segmentBeginEnd.get(begin);
count++;
if (count % 100 == 0) {
log.debug("Processing differences: {} of {} sentences...", count, segmentBeginEnd.size());
}
DiffResult diff = doDiffSingle(adapters, LINK_ROLE_AS_LABEL, casses, begin, end).toResult();
SourceListView curationSegment = new SourceListView();
curationSegment.setBegin(begin);
curationSegment.setEnd(end);
curationSegment.setSentenceNumber(segmentNumber.get(begin));
if (diff.hasDifferences() || !diff.getIncompleteConfigurationSets().isEmpty()) {
// Is this confSet a diff due to stacked annotations (with same configuration)?
boolean stackedDiff = false;
stackedDiffSet: for (ConfigurationSet d : diff.getDifferingConfigurationSets().values()) {
for (Configuration c : d.getConfigurations()) {
if (c.getCasGroupIds().size() != d.getCasGroupIds().size()) {
stackedDiff = true;
break stackedDiffSet;
}
}
}
if (stackedDiff) {
curationSegment.setSentenceState(DISAGREE);
} else if (!diff.getIncompleteConfigurationSets().isEmpty()) {
curationSegment.setSentenceState(DISAGREE);
} else {
curationSegment.setSentenceState(AGREE);
}
} else {
curationSegment.setSentenceState(AGREE);
}
for (String username : segmentAdress.keySet()) {
curationSegment.getSentenceAddress().put(username, segmentAdress.get(username).get(begin));
}
curationContainer.getCurationViewByBegin().put(begin, curationSegment);
}
log.debug("Difference calculation completed in {}ms", (System.currentTimeMillis() - diffStart));
return curationContainer;
}
Aggregations