use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.
the class CasDiff method addCas.
/**
* CASes are added to the diff one after another, building the diff iteratively. A CAS can be
* added multiple times for different types. Make sure a CAS is not added twice with the same
* type!
*
* @param aCasGroupId
* the ID of the CAS group to add.
* @param aCas
* the CAS itself.
* @param aType
* the type on which to calculate the diff.
*/
private void addCas(String aCasGroupId, int aCasId, CAS aCas, String aType) {
// Remember that we have already seen this CAS.
List<CAS> casList = cases.get(aCasGroupId);
if (casList == null) {
casList = new ArrayList<>();
cases.put(aCasGroupId, casList);
}
// that failed when we had multiple "null" CASes.
if ((casList.size() - 1) < aCasId) {
casList.add(aCas);
}
assert (casList.size() - 1) == aCasId : "Expected CAS ID [" + (casList.size() - 1) + "] but was [" + aCasId + "]";
// We add these to the internal list above, but then we bail out here.
if (aCas == null) {
LOG.debug("CAS group [" + aCasGroupId + "] does not contain a CAS at index [" + aCasId + "].");
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Processing CAS group [" + aCasGroupId + "] CAS [" + aCasId + "].");
String collectionId = null;
String documentId = null;
try {
FeatureStructure dmd = WebAnnoCasUtil.getDocumentMetadata(aCas);
collectionId = FSUtil.getFeature(dmd, "collectionId", String.class);
documentId = FSUtil.getFeature(dmd, "documentId", String.class);
LOG.debug("User [" + collectionId + "] - Document [" + documentId + "]");
} catch (IllegalArgumentException e) {
// We use this information only for debugging - so we can ignore if the information
// is missing.
}
}
Type type = aCas.getTypeSystem().getType(aType);
if (type == null) {
LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains no annotations of type [" + aType + "]");
return;
}
Collection<AnnotationFS> annotations;
if (begin == -1 && end == -1) {
annotations = select(aCas, type);
} else {
annotations = selectCovered(aCas, type, begin, end);
}
if (annotations.isEmpty()) {
LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains no annotations of type [" + aType + "]");
return;
}
LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains [" + annotations.size() + "] annotations of type [" + aType + "]");
int posBefore = configSets.keySet().size();
LOG.debug("Positions before: [" + posBefore + "]");
for (AnnotationFS fs : annotations) {
List<Position> positions = new ArrayList<>();
// Get/create configuration set at the current position
positions.add(getAdapter(aType).getPosition(aCasId, fs));
// Generate secondary positions for multi-link features
positions.addAll(getAdapter(aType).generateSubPositions(aCasId, fs, linkCompareBehavior));
for (Position pos : positions) {
ConfigurationSet configSet = configSets.get(pos);
if (configSet == null) {
configSet = new ConfigurationSet(pos);
configSets.put(pos, configSet);
}
assert pos.getClass() == configSet.position.getClass() : "Position type mismatch [" + pos.getClass() + "] vs [" + configSet.position.getClass() + "]";
// Merge FS into current set
addConfiguration(configSet, aCasGroupId, fs);
}
}
LOG.debug("Positions after: [" + configSets.keySet().size() + "] (delta: " + (configSets.keySet().size() - posBefore) + ")");
//
// // Remember that we have processed the type
// entryTypes.add(aType);
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.
the class CasDiff method equalsAnnotationFS.
private boolean equalsAnnotationFS(AnnotationFS aFS1, AnnotationFS aFS2) {
// Null check
if (aFS1 == null || aFS2 == null) {
return false;
}
// Position check
DiffAdapter adapter = getAdapter(aFS1.getType().getName());
Position pos1 = adapter.getPosition(0, aFS1);
Position pos2 = adapter.getPosition(0, aFS2);
return pos1.compareTo(pos2) == 0;
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.
the class CasMerge method reMergeCas.
/**
* Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
* MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
* <p>
* Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
* <p>
* Any two annotation with different value will be assumed a difference
* <p>
* Any non stacked empty/null annotations are assumed agreement
* <p>
* Any non stacked annotations with similar values for each of the features are assumed
* agreement
* <p>
* Any two link mode / slotable annotations which agree on the base features are assumed
* agreement
*
* @param aDiff
* the {@link DiffResult}
* @param aCases
* a map of {@code CAS}s for each users and the random merge
*/
public void reMergeCas(DiffResult aDiff, SourceDocument aTargetDocument, String aTargetUsername, CAS aTargetCas, Map<String, CAS> aCases) throws AnnotationException, UIMAException {
silenceEvents = true;
int updated = 0;
int created = 0;
Set<LogMessage> messages = new LinkedHashSet<>();
// Remove any annotations from the target CAS - keep type system, sentences and tokens
clearAnnotations(aTargetCas);
// If there is nothing to merge, bail out
if (aCases.isEmpty()) {
return;
}
// Set up a cache for resolving type to layer to avoid hammering the DB as we process each
// position
Map<String, AnnotationLayer> type2layer = aDiff.getPositions().stream().map(Position::getType).distinct().map(type -> schemaService.findLayer(aTargetDocument.getProject(), type)).collect(toMap(AnnotationLayer::getName, identity()));
List<String> layerNames = new ArrayList<>(type2layer.keySet());
// Move token layer to front
if (layerNames.contains(Token.class.getName())) {
layerNames.remove(Token.class.getName());
layerNames.add(0, Token.class.getName());
}
// Move sentence layer to front
if (layerNames.contains(Sentence.class.getName())) {
layerNames.remove(Sentence.class.getName());
layerNames.add(0, Sentence.class.getName());
}
// and sentences before the others)
for (String layerName : layerNames) {
List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() == null).collect(Collectors.toList());
if (positions.isEmpty()) {
continue;
}
LOG.debug("Processing {} span positions on layer {}", positions.size(), layerName);
// Slots are also excluded for the moment
for (SpanPosition position : positions) {
LOG.trace(" | processing {}", position);
ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
if (!shouldMerge(aDiff, cfgs)) {
continue;
}
try {
Map<String, List<CAS>> casMap = new LinkedHashMap<>();
aCases.forEach((k, v) -> casMap.put(k, asList(v)));
AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
CasMergeOperationResult result = mergeSpanAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
LOG.trace(" `-> merged annotation with agreement");
switch(result.getState()) {
case CREATED:
created++;
break;
case UPDATED:
updated++;
break;
}
} catch (AnnotationException e) {
LOG.trace(" `-> not merged annotation: {}", e.getMessage());
messages.add(LogMessage.error(this, "%s", e.getMessage()));
}
}
}
// After the spans are in place, we can merge the slot features
for (String layerName : layerNames) {
List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() != null).collect(Collectors.toList());
if (positions.isEmpty()) {
continue;
}
LOG.debug("Processing {} slot positions on layer [{}]", positions.size(), layerName);
for (SpanPosition position : positions) {
LOG.trace(" | processing {}", position);
ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
if (!shouldMerge(aDiff, cfgs)) {
continue;
}
try {
Map<String, List<CAS>> casMap = new LinkedHashMap<>();
aCases.forEach((k, v) -> casMap.put(k, asList(v)));
AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
AID sourceFsAid = cfgs.getConfigurations().get(0).getRepresentativeAID();
mergeSlotFeature(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, sourceFsAid.feature, sourceFsAid.index);
LOG.trace(" `-> merged annotation with agreement");
} catch (AnnotationException e) {
LOG.trace(" `-> not merged annotation: {}", e.getMessage());
messages.add(LogMessage.error(this, "%s", e.getMessage()));
}
}
}
// Finally, we merge the relations
for (String layerName : layerNames) {
List<RelationPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof RelationPosition).map(pos -> (RelationPosition) pos).collect(Collectors.toList());
if (positions.isEmpty()) {
continue;
}
LOG.debug("Processing {} relation positions on layer [{}]", positions.size(), layerName);
for (RelationPosition position : positions) {
LOG.trace(" | processing {}", position);
ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
if (!shouldMerge(aDiff, cfgs)) {
continue;
}
try {
Map<String, List<CAS>> casMap = new LinkedHashMap<>();
aCases.forEach((k, v) -> casMap.put(k, asList(v)));
AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
CasMergeOperationResult result = mergeRelationAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
LOG.trace(" `-> merged annotation with agreement");
switch(result.getState()) {
case CREATED:
created++;
break;
case UPDATED:
updated++;
break;
}
} catch (AnnotationException e) {
LOG.trace(" `-> not merged annotation: {}", e.getMessage());
messages.add(LogMessage.error(this, "%s", e.getMessage()));
}
}
}
if (eventPublisher != null) {
eventPublisher.publishEvent(new BulkAnnotationEvent(this, aTargetDocument, aTargetUsername, null));
}
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.
the class AgreementUtils method configurationSetsWithItemsToCsv.
private static void configurationSetsWithItemsToCsv(CSVPrinter aOut, AgreementResult<ICodingAnnotationStudy> aAgreement, List<ConfigurationSet> aSets) throws IOException {
List<String> headers = new ArrayList<>(asList("Type", "Collection", "Document", "Layer", "Feature", "Position"));
headers.addAll(aAgreement.getCasGroupIds());
aOut.printRecord(headers);
int i = 0;
for (ICodingAnnotationItem item : aAgreement.getStudy().getItems()) {
Position pos = aSets.get(i).getPosition();
List<String> values = new ArrayList<>();
values.add(pos.getClass().getSimpleName());
values.add(pos.getCollectionId());
values.add(pos.getDocumentId());
values.add(pos.getType());
values.add(aAgreement.getFeature());
values.add(aSets.get(i).getPosition().toMinimalString());
for (IAnnotationUnit unit : item.getUnits()) {
values.add(String.valueOf(unit.getCategory()));
}
aOut.printRecord(values);
i++;
}
}
use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.
the class AgreementUtils method makeCodingStudy.
private static CodingAgreementResult makeCodingStudy(CasDiff aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<CAS>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
CAS someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurationSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof RelationPosition) {
RelationPosition pos = (RelationPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
RelationDiffAdapter adapter = (RelationDiffAdapter) aDiff.getTypeAdapters().get(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
values[i] = extractValueForAgreement(fs, aFeature, cfg.getAID(user).index, cfg.getPosition().getLinkCompareBehavior());
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
Aggregations