Search in sources :

Example 1 with Position

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.

the class CasDiff method addCas.

/**
 * CASes are added to the diff one after another, building the diff iteratively. A CAS can be
 * added multiple times for different types. Make sure a CAS is not added twice with the same
 * type!
 *
 * @param aCasGroupId
 *            the ID of the CAS group to add.
 * @param aCas
 *            the CAS itself.
 * @param aType
 *            the type on which to calculate the diff.
 */
private void addCas(String aCasGroupId, int aCasId, CAS aCas, String aType) {
    // Remember that we have already seen this CAS.
    List<CAS> casList = cases.get(aCasGroupId);
    if (casList == null) {
        casList = new ArrayList<>();
        cases.put(aCasGroupId, casList);
    }
    // that failed when we had multiple "null" CASes.
    if ((casList.size() - 1) < aCasId) {
        casList.add(aCas);
    }
    assert (casList.size() - 1) == aCasId : "Expected CAS ID [" + (casList.size() - 1) + "] but was [" + aCasId + "]";
    // We add these to the internal list above, but then we bail out here.
    if (aCas == null) {
        LOG.debug("CAS group [" + aCasGroupId + "] does not contain a CAS at index [" + aCasId + "].");
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing CAS group [" + aCasGroupId + "] CAS [" + aCasId + "].");
        String collectionId = null;
        String documentId = null;
        try {
            FeatureStructure dmd = WebAnnoCasUtil.getDocumentMetadata(aCas);
            collectionId = FSUtil.getFeature(dmd, "collectionId", String.class);
            documentId = FSUtil.getFeature(dmd, "documentId", String.class);
            LOG.debug("User [" + collectionId + "] - Document [" + documentId + "]");
        } catch (IllegalArgumentException e) {
        // We use this information only for debugging - so we can ignore if the information
        // is missing.
        }
    }
    Type type = aCas.getTypeSystem().getType(aType);
    if (type == null) {
        LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains no annotations of type [" + aType + "]");
        return;
    }
    Collection<AnnotationFS> annotations;
    if (begin == -1 && end == -1) {
        annotations = select(aCas, type);
    } else {
        annotations = selectCovered(aCas, type, begin, end);
    }
    if (annotations.isEmpty()) {
        LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains no annotations of type [" + aType + "]");
        return;
    }
    LOG.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains [" + annotations.size() + "] annotations of type [" + aType + "]");
    int posBefore = configSets.keySet().size();
    LOG.debug("Positions before: [" + posBefore + "]");
    for (AnnotationFS fs : annotations) {
        List<Position> positions = new ArrayList<>();
        // Get/create configuration set at the current position
        positions.add(getAdapter(aType).getPosition(aCasId, fs));
        // Generate secondary positions for multi-link features
        positions.addAll(getAdapter(aType).generateSubPositions(aCasId, fs, linkCompareBehavior));
        for (Position pos : positions) {
            ConfigurationSet configSet = configSets.get(pos);
            if (configSet == null) {
                configSet = new ConfigurationSet(pos);
                configSets.put(pos, configSet);
            }
            assert pos.getClass() == configSet.position.getClass() : "Position type mismatch [" + pos.getClass() + "] vs [" + configSet.position.getClass() + "]";
            // Merge FS into current set
            addConfiguration(configSet, aCasGroupId, fs);
        }
    }
    LOG.debug("Positions after: [" + configSets.keySet().size() + "] (delta: " + (configSets.keySet().size() - posBefore) + ")");
// 
// // Remember that we have processed the type
// entryTypes.add(aType);
}
Also used : Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) ArrayList(java.util.ArrayList) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS)

Example 2 with Position

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.

the class CasDiff method equalsAnnotationFS.

private boolean equalsAnnotationFS(AnnotationFS aFS1, AnnotationFS aFS2) {
    // Null check
    if (aFS1 == null || aFS2 == null) {
        return false;
    }
    // Position check
    DiffAdapter adapter = getAdapter(aFS1.getType().getName());
    Position pos1 = adapter.getPosition(0, aFS1);
    Position pos2 = adapter.getPosition(0, aFS2);
    return pos1.compareTo(pos2) == 0;
}
Also used : Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter)

Example 3 with Position

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.

the class CasMerge method reMergeCas.

/**
 * Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
 * MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
 * <p>
 * Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
 * <p>
 * Any two annotation with different value will be assumed a difference
 * <p>
 * Any non stacked empty/null annotations are assumed agreement
 * <p>
 * Any non stacked annotations with similar values for each of the features are assumed
 * agreement
 * <p>
 * Any two link mode / slotable annotations which agree on the base features are assumed
 * agreement
 *
 * @param aDiff
 *            the {@link DiffResult}
 * @param aCases
 *            a map of {@code CAS}s for each users and the random merge
 */
public void reMergeCas(DiffResult aDiff, SourceDocument aTargetDocument, String aTargetUsername, CAS aTargetCas, Map<String, CAS> aCases) throws AnnotationException, UIMAException {
    silenceEvents = true;
    int updated = 0;
    int created = 0;
    Set<LogMessage> messages = new LinkedHashSet<>();
    // Remove any annotations from the target CAS - keep type system, sentences and tokens
    clearAnnotations(aTargetCas);
    // If there is nothing to merge, bail out
    if (aCases.isEmpty()) {
        return;
    }
    // Set up a cache for resolving type to layer to avoid hammering the DB as we process each
    // position
    Map<String, AnnotationLayer> type2layer = aDiff.getPositions().stream().map(Position::getType).distinct().map(type -> schemaService.findLayer(aTargetDocument.getProject(), type)).collect(toMap(AnnotationLayer::getName, identity()));
    List<String> layerNames = new ArrayList<>(type2layer.keySet());
    // Move token layer to front
    if (layerNames.contains(Token.class.getName())) {
        layerNames.remove(Token.class.getName());
        layerNames.add(0, Token.class.getName());
    }
    // Move sentence layer to front
    if (layerNames.contains(Sentence.class.getName())) {
        layerNames.remove(Sentence.class.getName());
        layerNames.add(0, Sentence.class.getName());
    }
    // and sentences before the others)
    for (String layerName : layerNames) {
        List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() == null).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} span positions on layer {}", positions.size(), layerName);
        // Slots are also excluded for the moment
        for (SpanPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                CasMergeOperationResult result = mergeSpanAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
                LOG.trace(" `-> merged annotation with agreement");
                switch(result.getState()) {
                    case CREATED:
                        created++;
                        break;
                    case UPDATED:
                        updated++;
                        break;
                }
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    // After the spans are in place, we can merge the slot features
    for (String layerName : layerNames) {
        List<SpanPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof SpanPosition).map(pos -> (SpanPosition) pos).filter(pos -> pos.getFeature() != null).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} slot positions on layer [{}]", positions.size(), layerName);
        for (SpanPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                AID sourceFsAid = cfgs.getConfigurations().get(0).getRepresentativeAID();
                mergeSlotFeature(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, sourceFsAid.feature, sourceFsAid.index);
                LOG.trace(" `-> merged annotation with agreement");
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    // Finally, we merge the relations
    for (String layerName : layerNames) {
        List<RelationPosition> positions = aDiff.getPositions().stream().filter(pos -> layerName.equals(pos.getType())).filter(pos -> pos instanceof RelationPosition).map(pos -> (RelationPosition) pos).collect(Collectors.toList());
        if (positions.isEmpty()) {
            continue;
        }
        LOG.debug("Processing {} relation positions on layer [{}]", positions.size(), layerName);
        for (RelationPosition position : positions) {
            LOG.trace(" |   processing {}", position);
            ConfigurationSet cfgs = aDiff.getConfigurationSet(position);
            if (!shouldMerge(aDiff, cfgs)) {
                continue;
            }
            try {
                Map<String, List<CAS>> casMap = new LinkedHashMap<>();
                aCases.forEach((k, v) -> casMap.put(k, asList(v)));
                AnnotationFS sourceFS = (AnnotationFS) cfgs.getConfigurations().get(0).getRepresentative(casMap);
                CasMergeOperationResult result = mergeRelationAnnotation(aTargetDocument, aTargetUsername, type2layer.get(position.getType()), aTargetCas, sourceFS, false);
                LOG.trace(" `-> merged annotation with agreement");
                switch(result.getState()) {
                    case CREATED:
                        created++;
                        break;
                    case UPDATED:
                        updated++;
                        break;
                }
            } catch (AnnotationException e) {
                LOG.trace(" `-> not merged annotation: {}", e.getMessage());
                messages.add(LogMessage.error(this, "%s", e.getMessage()));
            }
        }
    }
    if (eventPublisher != null) {
        eventPublisher.publishEvent(new BulkAnnotationEvent(this, aTargetDocument, aTargetUsername, null));
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WebAnnoCasUtil.isBasicFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isBasicFeature) TypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.TypeAdapter) Configuration(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.Configuration) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) SpanAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter) LoggerFactory(org.slf4j.LoggerFactory) WebAnnoCasUtil(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnnotationException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException) LinkMode(de.tudarmstadt.ukp.clarin.webanno.model.LinkMode) FSUtil(org.apache.uima.fit.util.FSUtil) Type(org.apache.uima.cas.Type) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData) CASImpl(org.apache.uima.cas.impl.CASImpl) Collectors.toMap(java.util.stream.Collectors.toMap) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) ApplicationEventPublisher(org.springframework.context.ApplicationEventPublisher) WebAnnoCasUtil.selectAnnotationByAddr(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectAnnotationByAddr) FeatureStructure(org.apache.uima.cas.FeatureStructure) Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) WebAnnoCasUtil.createToken(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createToken) UIMAException(org.apache.uima.UIMAException) MultiValueMode(de.tudarmstadt.ukp.clarin.webanno.model.MultiValueMode) WebAnnoCasUtil.isEquivalentSpanAnnotation(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isEquivalentSpanAnnotation) LoadingCache(com.github.benmanes.caffeine.cache.LoadingCache) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult) WebAnnoCasUtil.copyDocumentMetadata(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.copyDocumentMetadata) Set(java.util.Set) CASCompleteSerializer(org.apache.uima.cas.impl.CASCompleteSerializer) WebAnnoCasUtil.exists(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.exists) Collectors(java.util.stream.Collectors) WebAnnoCasUtil.selectSentences(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectSentences) Serialization.serializeCASComplete(org.apache.uima.cas.impl.Serialization.serializeCASComplete) List(java.util.List) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) FSUtil.getFeature(org.apache.uima.fit.util.FSUtil.getFeature) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) Function.identity(java.util.function.Function.identity) WebAnnoCasUtil.isPrimitiveType(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isPrimitiveType) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) WebAnnoCasUtil.selectTokens(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.selectTokens) FEAT_REL_SOURCE(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_SOURCE) FEAT_REL_TARGET(de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_TARGET) AID(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID) LogMessage(de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage) CAS(org.apache.uima.cas.CAS) Feature(org.apache.uima.cas.Feature) HashMap(java.util.HashMap) WebAnnoCasUtil.setFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.setFeature) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) AnnotationSchemaService(de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CasFactory(org.apache.uima.fit.factory.CasFactory) WebAnnoCasUtil.createSentence(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createSentence) Serialization.deserializeCASComplete(org.apache.uima.cas.impl.Serialization.deserializeCASComplete) LinkedHashSet(java.util.LinkedHashSet) LinkWithRoleModel(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.LinkWithRoleModel) WebAnnoCasUtil.getAddr(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getAddr) Caffeine(com.github.benmanes.caffeine.cache.Caffeine) Logger(org.slf4j.Logger) CasUtil.selectAt(org.apache.uima.fit.util.CasUtil.selectAt) WebAnnoCasUtil.getRealCas(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getRealCas) RelationAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.RelationAdapter) CasUtil(org.apache.uima.fit.util.CasUtil) CasUtil.selectCovered(org.apache.uima.fit.util.CasUtil.selectCovered) Collectors.toList(java.util.stream.Collectors.toList) BulkAnnotationEvent(de.tudarmstadt.ukp.clarin.webanno.api.annotation.event.BulkAnnotationEvent) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) IllegalFeatureValueException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.IllegalFeatureValueException) VID(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.VID) ArrayList(java.util.ArrayList) WebAnnoCasUtil.createToken(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) AnnotationException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toList(java.util.stream.Collectors.toList) AID(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) WebAnnoCasUtil.createSentence(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.createSentence) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) LogMessage(de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage) BulkAnnotationEvent(de.tudarmstadt.ukp.clarin.webanno.api.annotation.event.BulkAnnotationEvent) SpanPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanPosition)

Example 4 with Position

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.

the class AgreementUtils method configurationSetsWithItemsToCsv.

private static void configurationSetsWithItemsToCsv(CSVPrinter aOut, AgreementResult<ICodingAnnotationStudy> aAgreement, List<ConfigurationSet> aSets) throws IOException {
    List<String> headers = new ArrayList<>(asList("Type", "Collection", "Document", "Layer", "Feature", "Position"));
    headers.addAll(aAgreement.getCasGroupIds());
    aOut.printRecord(headers);
    int i = 0;
    for (ICodingAnnotationItem item : aAgreement.getStudy().getItems()) {
        Position pos = aSets.get(i).getPosition();
        List<String> values = new ArrayList<>();
        values.add(pos.getClass().getSimpleName());
        values.add(pos.getCollectionId());
        values.add(pos.getDocumentId());
        values.add(pos.getType());
        values.add(aAgreement.getFeature());
        values.add(aSets.get(i).getPosition().toMinimalString());
        for (IAnnotationUnit unit : item.getUnits()) {
            values.add(String.valueOf(unit.getCategory()));
        }
        aOut.printRecord(values);
        i++;
    }
}
Also used : Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) IAnnotationUnit(org.dkpro.statistics.agreement.IAnnotationUnit) ArrayList(java.util.ArrayList) ICodingAnnotationItem(org.dkpro.statistics.agreement.coding.ICodingAnnotationItem)

Example 5 with Position

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position in project webanno by webanno.

the class AgreementUtils method makeCodingStudy.

private static CodingAgreementResult makeCodingStudy(CasDiff aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<CAS>> aCasMap) {
    List<String> users = new ArrayList<>(aUsers);
    Collections.sort(users);
    List<ConfigurationSet> completeSets = new ArrayList<>();
    List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
    List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
    List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
    List<ConfigurationSet> pluralitySets = new ArrayList<>();
    List<ConfigurationSet> irrelevantSets = new ArrayList<>();
    CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
    // Check if the feature we are looking at is a primitive feature or a link feature
    // We do this by looking it up in the first available CAS. Mind that at this point all
    // CASes should have exactly the same typesystem.
    CAS someCas = findSomeCas(aCasMap);
    if (someCas == null) {
        // Well... there is NOTHING here!
        // All positions are irrelevant
        aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
        return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
    }
    TypeSystem ts = someCas.getTypeSystem();
    // We should just do the right thing here which is: do nothing
    if (ts.getType(aType) == null) {
        // All positions are irrelevant
        aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurationSet(p)));
        return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
    }
    // Check that the feature really exists instead of just getting a NPE later
    if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
        throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
    }
    boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
    nextPosition: for (Position p : aDiff.getPositions()) {
        ConfigurationSet cfgSet = aDiff.getConfigurationSet(p);
        // Only calculate agreement for the given layer
        if (!cfgSet.getPosition().getType().equals(aType)) {
            // We don't even consider these as irrelevant, they are just filtered out
            continue;
        }
        // If the feature on a position is set, then it is a subposition
        boolean isSubPosition = p.getFeature() != null;
        // this is an inverted XOR!
        if (!(isPrimitiveFeature ^ isSubPosition)) {
            irrelevantSets.add(cfgSet);
            continue;
        }
        // feature
        if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
            irrelevantSets.add(cfgSet);
            continue nextPosition;
        }
        // If non of the current users has made any annotation at this position, then skip it
        if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
            irrelevantSets.add(cfgSet);
            continue nextPosition;
        }
        Object[] values = new Object[users.size()];
        int i = 0;
        for (String user : users) {
            // this configuration set.
            if (!cfgSet.getCasGroupIds().contains(user)) {
                incompleteSetsByPosition.add(cfgSet);
                if (aExcludeIncomplete) {
                    // Record as incomplete
                    continue nextPosition;
                } else {
                    // Record as missing value
                    values[i] = null;
                    i++;
                    continue;
                }
            }
            // Make sure a single user didn't do multiple alternative annotations at a single
            // position. So there is currently no support for calculating agreement on stacking
            // annotations.
            List<Configuration> cfgs = cfgSet.getConfigurations(user);
            if (cfgs.size() > 1) {
                pluralitySets.add(cfgSet);
                continue nextPosition;
            }
            Configuration cfg = cfgs.get(0);
            // Check if source and/or targets of a relation are stacked
            if (cfg.getPosition() instanceof RelationPosition) {
                RelationPosition pos = (RelationPosition) cfg.getPosition();
                FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
                RelationDiffAdapter adapter = (RelationDiffAdapter) aDiff.getTypeAdapters().get(pos.getType());
                // Check if the source of the relation is stacked
                AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
                List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
                if (sourceCandidates.size() > 1) {
                    pluralitySets.add(cfgSet);
                    continue nextPosition;
                }
                // Check if the target of the relation is stacked
                AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
                List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
                if (targetCandidates.size() > 1) {
                    pluralitySets.add(cfgSet);
                    continue nextPosition;
                }
            }
            // Only calculate agreement for the given feature
            FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
            values[i] = extractValueForAgreement(fs, aFeature, cfg.getAID(user).index, cfg.getPosition().getLinkCompareBehavior());
            // agreement calculation. The empty label is still a valid label.
            if (aNullLabelsAsEmpty && values[i] == null) {
                values[i] = "";
            }
            // "null" cannot be used in agreement calculations. We treat these as incomplete
            if (values[i] == null) {
                incompleteSetsByLabel.add(cfgSet);
                if (aExcludeIncomplete) {
                    continue nextPosition;
                }
            }
            i++;
        }
        if (ObjectUtils.notEqual(values[0], values[1])) {
            setsWithDifferences.add(cfgSet);
        }
        // are calculating agreement over
        assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
        completeSets.add(cfgSet);
        study.addItemAsArray(values);
    }
    return new CodingAgreementResult(aType, aFeature, aDiff.toResult(), study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
Also used : LinkCompareBehavior(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkCompareBehavior) WebAnnoCasUtil.getFeature(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getFeature) CasDiff(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Configuration(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.Configuration) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CAS(org.apache.uima.cas.CAS) IAnnotationUnit(org.dkpro.statistics.agreement.IAnnotationUnit) FSUtil(org.apache.uima.fit.util.FSUtil) ArrayList(java.util.ArrayList) ByteArrayInputStream(java.io.ByteArrayInputStream) CSVFormat(org.apache.commons.csv.CSVFormat) Arrays.asList(java.util.Arrays.asList) ObjectUtils(org.apache.commons.lang3.ObjectUtils) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) FeatureStructure(org.apache.uima.cas.FeatureStructure) Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) PrintStream(java.io.PrintStream) TypeSystem(org.apache.uima.cas.TypeSystem) CodingAnnotationStudy(org.dkpro.statistics.agreement.coding.CodingAnnotationStudy) CodingAgreementResult(de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.CodingAgreementResult) ArrayFS(org.apache.uima.cas.ArrayFS) Collection(java.util.Collection) ICodingAnnotationItem(org.dkpro.statistics.agreement.coding.ICodingAnnotationItem) IOException(java.io.IOException) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) CasUtil(org.apache.uima.fit.util.CasUtil) List(java.util.List) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) ICodingAnnotationStudy(org.dkpro.statistics.agreement.coding.ICodingAnnotationStudy) Collections(java.util.Collections) CSVPrinter(org.apache.commons.csv.CSVPrinter) InputStream(java.io.InputStream) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) TypeSystem(org.apache.uima.cas.TypeSystem) Configuration(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.Configuration) Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition) ArrayList(java.util.ArrayList) CodingAnnotationStudy(org.dkpro.statistics.agreement.coding.CodingAnnotationStudy) ICodingAnnotationStudy(org.dkpro.statistics.agreement.coding.ICodingAnnotationStudy) CodingAgreementResult(de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.CodingAgreementResult) FeatureStructure(org.apache.uima.cas.FeatureStructure) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CAS(org.apache.uima.cas.CAS) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) RelationPosition(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition)

Aggregations

Position (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position)5 ArrayList (java.util.ArrayList)4 RelationPosition (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationPosition)3 CAS (org.apache.uima.cas.CAS)3 FeatureStructure (org.apache.uima.cas.FeatureStructure)3 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)3 Configuration (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.Configuration)2 ConfigurationSet (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet)2 RelationDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter)2 Type (org.apache.uima.cas.Type)2 IAnnotationUnit (org.dkpro.statistics.agreement.IAnnotationUnit)2 ICodingAnnotationItem (org.dkpro.statistics.agreement.coding.ICodingAnnotationItem)2 Caffeine (com.github.benmanes.caffeine.cache.Caffeine)1 LoadingCache (com.github.benmanes.caffeine.cache.LoadingCache)1 CodingAgreementResult (de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.CodingAgreementResult)1 AnnotationSchemaService (de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService)1 FEAT_REL_SOURCE (de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_SOURCE)1 FEAT_REL_TARGET (de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.FEAT_REL_TARGET)1 RelationAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.RelationAdapter)1 SpanAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter)1