Search in sources :

Example 1 with SpanPosition

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition in project webanno by webanno.

the class CasMergeTest method thatIncompleteAnnotationIsMerged.

/**
 * If one annotator has provided an annotation at a given position and the other annotator did
 * not (i.e. the annotations are incomplete), then this should be detected as a disagreement.
 */
@Test
public void thatIncompleteAnnotationIsMerged() throws Exception {
    CAS user1 = CasFactory.createText("word");
    createTokenAndOptionalPos(user1, 0, 4, "X");
    CAS user2 = CasFactory.createText("word");
    createTokenAndOptionalPos(user2, 0, 4, null);
    Map<String, List<CAS>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(user1));
    casByUser.put("user2", asList(user2));
    JCas curatorCas = createText(casByUser.values().stream().flatMap(Collection::stream).findFirst().get().getDocumentText());
    DiffResult result = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser).toResult();
    sut.setMergeIncompleteAnnotations(true);
    sut.reMergeCas(result, document, null, curatorCas.getCas(), getSingleCasByUser(casByUser));
    assertThat(result.getDifferingConfigurationSets()).isEmpty();
    assertThat(result.getIncompleteConfigurationSets().values()).extracting(set -> set.getPosition()).usingFieldByFieldElementComparator().containsExactly(new SpanPosition(null, null, 0, POS.class.getName(), 0, 4, "word", null, null, -1, -1, null, null));
    assertThat(select(curatorCas, POS.class)).hasSize(1);
}
Also used : CAS(org.apache.uima.cas.CAS) JCas(org.apache.uima.jcas.JCas) JCasFactory.createJCas(org.apache.uima.fit.factory.JCasFactory.createJCas) Collection(java.util.Collection) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 2 with SpanPosition

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition in project webanno by webanno.

the class CasMergeTest method thatIncompleteAnnotationIsNotMerged.

/**
 * If one annotator has provided an annotation at a given position and the other annotator did
 * not (i.e. the annotations are incomplete), then this should be detected as a disagreement.
 */
@Test
public void thatIncompleteAnnotationIsNotMerged() throws Exception {
    CAS user1 = CasFactory.createText("word");
    createTokenAndOptionalPos(user1, 0, 4, "X");
    CAS user2 = CasFactory.createText("word");
    createTokenAndOptionalPos(user2, 0, 4, null);
    Map<String, List<CAS>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(user1));
    casByUser.put("user2", asList(user2));
    JCas curatorCas = createText(// 
    casByUser.values().stream().flatMap(// 
    Collection::stream).findFirst().get().getDocumentText());
    DiffResult result = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser).toResult();
    sut.reMergeCas(result, document, null, curatorCas.getCas(), getSingleCasByUser(casByUser));
    assertThat(result.getDifferingConfigurationSets()).isEmpty();
    assertThat(result.getIncompleteConfigurationSets().values()).extracting(set -> set.getPosition()).usingFieldByFieldElementComparator().containsExactly(new SpanPosition(null, null, 0, POS.class.getName(), 0, 4, "word", null, null, -1, -1, null, null));
    assertThat(select(curatorCas, POS.class)).isEmpty();
}
Also used : CAS(org.apache.uima.cas.CAS) JCas(org.apache.uima.jcas.JCas) JCasFactory.createJCas(org.apache.uima.fit.factory.JCasFactory.createJCas) Collection(java.util.Collection) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 3 with SpanPosition

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition in project webanno by webanno.

the class CasMerge method reMergeCas.

/**
 * Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
 * MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
 * <p>
 * Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
 * <p>
 * Any two annotation with different value will be assumed a difference
 * <p>
 * Any non stacked empty/null annotations are assumed agreement
 * <p>
 * Any non stacked annotations with similar values for each of the features are assumed
 * agreement
 * <p>
 * Any two link mode / slotable annotations which agree on the base features are assumed
 * agreement
 *
 * @param aDiff
 *            the {@link DiffResult}
 * @param aCases
 *            a map of {@code CAS}s for each users and the random merge
 */
public void reMergeCas(DiffResult aDiff, SourceDocument aTargetDocument, String aTargetUsername, CAS aTargetCas, Map<String, CAS> aCases) throws AnnotationException, UIMAException {
    silenceEvents = true;
    int updated = 0;
    int created = 0;
    Set<LogMessage> messages = new LinkedHashSet<>();
    // Remove any annotations from the target CAS - keep type system, sentences and tokens
    clearAnnotations(aTargetCas);
    // If there is nothing to merge, bail out
    if (aCases.isEmpty()) {
        return;
    }
    // Set up a cache for resolving type to layer to avoid hammering the DB as we process each
    // position
    Map<String, AnnotationLayer> type2layer = aDiff.getPositions().stream().map(Position::getType).distinct().map(type -> schemaService.findLayer(aTargetDocument.getProject(), type)).collect(toMap(AnnotationLayer::getName, identity()));
    List<String> layerNames = new ArrayList<>(type2layer.keySet());
    // Move token layer to front
    if (layerNames.contains(Token.class.getName())) {
        layerNames.remove(Token.class.getName());
        layerNames.add(0, Token.class.getName());
    }
    // Move sentence layer to front
    if (layerNames.contains(Sentence.class.getName())) {
        layerNames.remove(Sentence.class.getName());
        layerNames.add(0, Sentence.class.getName());
    }
    // and sentences before the others)
    for (String layerName : layerNames) {
        List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() == null).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} span positions on layer {}", positions.size(), layerName);
        // Slots are also excluded for the moment
        for (SpanPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                CasMergeOperationResult result = mergeSpanAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
                LOG.trace(" `-> merged annotation with agreement");
                switch(result.getState()) {
                    case CREATED:
                        created++;
                        break;
                    case UPDATED:
                        updated++;
                        break;
                }
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    // After the spans are in place, we can merge the slot features
    for (String layerName : layerNames) {
        List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() != null).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} slot positions on layer [{}]", positions.size(), layerName);
        for (SpanPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                AID sourceFsAid = cfgs.getConfigurations().get(0).getRepresentativeAID();
                mergeSlotFeature(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, sourceFsAid.feature, sourceFsAid.index);
                LOG.trace(" `-> merged annotation with agreement");
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    // Finally, we merge the relations
    for (String layerName : layerNames) {
        List<RelationPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof RelationPosition).map(pos -> (RelationPosition) pos).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} relation positions on layer [{}]", positions.size(), layerName);
        for (RelationPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                CasMergeOperationResult result = mergeRelationAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
                LOG.trace(" `-> merged annotation with agreement");
                switch(result.getState()) {
                    case CREATED:
                        created++;
                        break;
                    case UPDATED:
                        updated++;
                        break;
                }
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    if (eventPublisher != null) {
        eventPublisher.publishEvent(new BulkAnnotationEvent(this, aTargetDocument, aTargetUsername, null));
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WebAnnoCasUtil.isBasicFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isBasicFeature) TypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.TypeAdapter) Configuration(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.Configuration) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) SpanAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter) LoggerFactory(org.slf4j.LoggerFactory) WebAnnoCasUtil(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnnotationException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException) LinkMode(de.tudarmstadt.ukp.clarin.webanno.model.LinkMode) FSUtil(org.apache.uima.fit.util.FSUtil) Type(org.apache.uima.cas.Type) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) CASImpl(org.apache.uima.cas.impl.CASImpl) Collectors.toMap(java.util.stream.Collectors.toMap) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) ApplicationEventPublisher(org.springframework.context.ApplicationEventPublisher) WebAnnoCasUtil.selectAnnotationByAddr(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectAnnotationByAddr) FeatureStructure(org.apache.uima.cas.FeatureStructure) Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) WebAnnoCasUtil.createToken(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createToken) UIMAException(org.apache.uima.UIMAException) MultiValueMode(de.tudarmstadt.ukp.clarin.webanno.model.MultiValueMode) WebAnnoCasUtil.isEquivalentSpanAnnotation(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isEquivalentSpanAnnotation) LoadingCache(com.github.benmanes.caffeine.cache.LoadingCache) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult) WebAnnoCasUtil.copyDocumentMetadata(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.copyDocumentMetadata) Set(java.util.Set) CASCompleteSerializer(org.apache.uima.cas.impl.CASCompleteSerializer) WebAnnoCasUtil.exists(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.exists) Collectors(java.util.stream.Collectors) WebAnnoCasUtil.selectSentences(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectSentences) Serialization.serializeCASComplete(org.apache.uima.cas.impl.Serialization.serializeCASComplete) List(java.util.List) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) FSUtil.getFeature(org.apache.uima.fit.util.FSUtil.getFeature) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) Function.identity(java.util.function.Function.identity) WebAnnoCasUtil.isPrimitiveType(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isPrimitiveType) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) WebAnnoCasUtil.selectTokens(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectTokens) FEAT_REL_SOURCE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_SOURCE) FEAT_REL_TARGET(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_TARGET) AID(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID) LogMessage(de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage) CAS(org.apache.uima.cas.CAS) Feature(org.apache.uima.cas.Feature) HashMap(java.util.HashMap) WebAnnoCasUtil.setFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) AnnotationSchemaService(de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CasFactory(org.apache.uima.fit.factory.CasFactory) WebAnnoCasUtil.createSentence(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createSentence) Serialization.deserializeCASComplete(org.apache.uima.cas.impl.Serialization.deserializeCASComplete) LinkedHashSet(java.util.LinkedHashSet) LinkWithRoleModel(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.LinkWithRoleModel) WebAnnoCasUtil.getAddr(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getAddr) Caffeine(com.github.benmanes.caffeine.cache.Caffeine) Logger(org.slf4j.Logger) CasUtil.selectAt(org.apache.uima.fit.util.CasUtil.selectAt) WebAnnoCasUtil.getRealCas(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getRealCas) RelationAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.RelationAdapter) CasUtil(org.apache.uima.fit.util.CasUtil) CasUtil.selectCovered(org.apache.uima.fit.util.CasUtil.selectCovered) Collectors.toList(java.util.stream.Collectors.toList) BulkAnnotationEvent(de.tudarmstadt.ukp.clarin.webanno.api.annotation.event.BulkAnnotationEvent) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) IllegalFeatureValueException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.IllegalFeatureValueException) VID(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.VID) ArrayList(java.util.ArrayList) WebAnnoCasUtil.createToken(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) AnnotationException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toList(java.util.stream.Collectors.toList) AID(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) WebAnnoCasUtil.createSentence(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createSentence) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) LogMessage(de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage) BulkAnnotationEvent(de.tudarmstadt.ukp.clarin.webanno.api.annotation.event.BulkAnnotationEvent) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition)

Aggregations

DiffResult (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult)3 SpanPosition (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition)3 ArrayList (java.util.ArrayList)3 Arrays.asList (java.util.Arrays.asList)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 CAS (org.apache.uima.cas.CAS)3 Collection (java.util.Collection)2 JCasFactory.createJCas (org.apache.uima.fit.factory.JCasFactory.createJCas)2 JCas (org.apache.uima.jcas.JCas)2 Test (org.junit.Test)2 Caffeine (com.github.benmanes.caffeine.cache.Caffeine)1 LoadingCache (com.github.benmanes.caffeine.cache.LoadingCache)1 AnnotationSchemaService (de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService)1 FEAT_REL_SOURCE (de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_SOURCE)1 FEAT_REL_TARGET (de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_TARGET)1 RelationAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.RelationAdapter)1 SpanAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter)1 TypeAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.TypeAdapter)1 BulkAnnotationEvent (de.tudarmstadt.ukp.clarin.webanno.api.annotation.event.BulkAnnotationEvent)1