Search in sources :

Example 1 with DiffAdapter

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter in project webanno by webanno.

the class PairwiseCodingAgreementTable method exportAllAgreements.

private IResourceStream exportAllAgreements() {
    return new AbstractResourceStream() {

        private static final long serialVersionUID = 1L;

        @Override
        public InputStream getInputStream() throws ResourceStreamNotFoundException {
            AnnotationFeature feature = getModelObject().getFeature();
            DefaultAgreementTraits traits = getModelObject().getTraits();
            Map<String, List<CAS>> casMap = casMapSupplier.get();
            List<DiffAdapter> adapters = CasDiff.getDiffAdapters(annotationService, asList(feature.getLayer()));
            CasDiff diff = doDiff(adapters, traits.getLinkCompareBehavior(), casMap);
            // AgreementResult agreementResult = AgreementUtils.makeStudy(diff,
            // feature.getLayer().getName(), feature.getName(),
            // pref.excludeIncomplete, casMap);
            // TODO: for the moment, we always include incomplete annotations during this
            // export.
            CodingAgreementResult agreementResult = makeCodingStudy(diff, feature.getLayer().getName(), feature.getName(), false, casMap);
            try {
                return AgreementUtils.generateCsvReport(agreementResult);
            } catch (Exception e) {
                // FIXME Is there some better error handling here?
                LOG.error("Unable to generate report", e);
                throw new ResourceStreamNotFoundException(e);
            }
        }

        @Override
        public void close() throws IOException {
        // Nothing to do
        }
    };
}
Also used : AbstractResourceStream(org.apache.wicket.util.resource.AbstractResourceStream) CasDiff(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff) DefaultAgreementTraits(de.tudarmstadt.ukp.clarin.webanno.agreement.measures.DefaultAgreementTraits) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter) ResourceStreamNotFoundException(org.apache.wicket.util.resource.ResourceStreamNotFoundException) ResourceStreamNotFoundException(org.apache.wicket.util.resource.ResourceStreamNotFoundException) IOException(java.io.IOException) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 2 with DiffAdapter

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter in project webanno by webanno.

the class CasDiff method equalsFS.

/**
 * Compare two feature structure to each other. Comparison is done recursively, but stops at
 * feature values that are annotations. For these, only offsets are checked, but feature values
 * are not inspected further. If the annotations are relevant, their type should be added to the
 * entry types and will then be checked and grouped separately.
 *
 * @param aFS1
 *            first feature structure.
 * @param aFS2
 *            second feature structure.
 * @return {@code true} if they are equal.
 */
private boolean equalsFS(FeatureStructure aFS1, FeatureStructure aFS2) {
    // Trivial case
    if (aFS1 == aFS2) {
        return true;
    }
    // Null check
    if (aFS1 == null || aFS2 == null) {
        return false;
    }
    // Trivial case
    if (aFS1.getCAS() == aFS2.getCAS() && getAddr(aFS1) == getAddr(aFS2)) {
        return true;
    }
    Type type1 = aFS1.getType();
    Type type2 = aFS2.getType();
    // Types must be the same
    if (!type1.getName().equals(type2.getName())) {
        return false;
    }
    assert type1.getNumberOfFeatures() == type2.getNumberOfFeatures();
    // Sort features by name to be independent over implementation details that may change the
    // order of the features as returned from Type.getFeatures().
    String[] cachedSortedFeatures = sortedFeaturesCache.get(type1.getName());
    if (cachedSortedFeatures == null) {
        cachedSortedFeatures = new String[type1.getNumberOfFeatures()];
        int i = 0;
        for (Feature f : aFS1.getType().getFeatures()) {
            cachedSortedFeatures[i] = f.getShortName();
            i++;
        }
        sortedFeaturesCache.put(type1.getName(), cachedSortedFeatures);
    }
    DiffAdapter adapter = typeAdapters.get(type1.getName());
    if (adapter == null) {
        LOG.warn("No diff adapter for type [" + type1.getName() + "] -- ignoring!");
        return true;
    }
    // Only consider label features. In particular these must not include position features
    // such as begin, end, etc.
    List<String> sortedFeatures = new ArrayList<>(asList(cachedSortedFeatures));
    Set<String> labelFeatures = adapter.getLabelFeatures();
    sortedFeatures.removeIf(f -> !labelFeatures.contains(f));
    if (!recurseIntoLinkFeatures) {
        // #1795 Chili REC: We can/should change CasDiff2 such that it does not recurse into
        // link features (or rather into any features that are covered by their own
        // sub-positions). So when when comparing two spans that differ only in their slots
        // (sub-positions) the main position could still exhibit agreement.
        sortedFeatures.removeIf(f -> adapter.getLinkFeature(f) != null);
    }
    nextFeature: for (String feature : sortedFeatures) {
        Feature f1 = type1.getFeatureByBaseName(feature);
        Feature f2 = type2.getFeatureByBaseName(feature);
        Type range = (f1 != null) ? f1.getRange() : (f2 != null ? f2.getRange() : null);
        // value (no value)
        if (range == null) {
            continue nextFeature;
        }
        // If both features are declared but their range differs, then the comparison is false
        if (f1 != null && f2 != null && !f1.getRange().equals(f2.getRange())) {
            return false;
        }
        switch(range.getName()) {
            case CAS.TYPE_NAME_BOOLEAN:
                {
                    boolean value1 = f1 != null ? aFS1.getBooleanValue(f1) : false;
                    boolean value2 = f2 != null ? aFS2.getBooleanValue(f2) : false;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_BYTE:
                {
                    byte value1 = f1 != null ? aFS1.getByteValue(f1) : 0;
                    byte value2 = f2 != null ? aFS2.getByteValue(f2) : 0;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_DOUBLE:
                {
                    double value1 = f1 != null ? aFS1.getDoubleValue(f1) : 0.0d;
                    double value2 = f2 != null ? aFS2.getDoubleValue(f2) : 0.0d;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_FLOAT:
                {
                    float value1 = f1 != null ? aFS1.getFloatValue(f1) : 0.0f;
                    float value2 = f2 != null ? aFS2.getFloatValue(f2) : 0.0f;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_INTEGER:
                {
                    int value1 = f1 != null ? aFS1.getIntValue(f1) : 0;
                    int value2 = f2 != null ? aFS2.getIntValue(f2) : 0;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_LONG:
                {
                    long value1 = f1 != null ? aFS1.getLongValue(f1) : 0l;
                    long value2 = f2 != null ? aFS2.getLongValue(f2) : 0l;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_SHORT:
                {
                    short value1 = f1 != null ? aFS1.getShortValue(f1) : 0;
                    short value2 = f2 != null ? aFS2.getShortValue(f2) : 0;
                    if (value1 != value2) {
                        return false;
                    }
                    break;
                }
            case CAS.TYPE_NAME_STRING:
                {
                    String value1 = f1 != null ? aFS1.getStringValue(f1) : null;
                    String value2 = f2 != null ? aFS2.getStringValue(f2) : null;
                    if (!StringUtils.equals(value1, value2)) {
                        return false;
                    }
                    break;
                }
            default:
                {
                    // Must be some kind of feature structure then
                    FeatureStructure valueFS1 = f1 != null ? aFS1.getFeatureValue(f1) : null;
                    FeatureStructure valueFS2 = f2 != null ? aFS2.getFeatureValue(f2) : null;
                    // Ignore the SofaFS - we already checked that the CAS is the same.
                    if (valueFS1 instanceof SofaFS) {
                        continue;
                    }
                    // If the feature value is an annotation, we just check the position is the same,
                    // but we do not go in deeper. If we we wanted to know differences on this type,
                    // then it should have been added as an entry type.
                    // 
                    // Q: Why do we not check if they are the same based on the CAS address?
                    // A: Because we are checking across CASes and addresses can differ.
                    // 
                    // Q: Why do we not check recursively?
                    // A: Because e.g. for chains, this would mean we consider the whole chain as a
                    // single annotation, but we want to consider each link as an annotation
                    TypeSystem ts1 = aFS1.getCAS().getTypeSystem();
                    if (ts1.subsumes(ts1.getType(CAS.TYPE_NAME_ANNOTATION), type1)) {
                        if (!equalsAnnotationFS((AnnotationFS) aFS1, (AnnotationFS) aFS2)) {
                            return false;
                        }
                    }
                    // just dealing with structured features. It is ok to check these deeply.
                    if (!equalsFS(valueFS1, valueFS2)) {
                        return false;
                    }
                }
        }
    }
    return true;
}
Also used : TypeSystem(org.apache.uima.cas.TypeSystem) ArrayList(java.util.ArrayList) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter) SofaFS(org.apache.uima.cas.SofaFS) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type)

Example 3 with DiffAdapter

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter in project webanno by webanno.

the class CasDiff method addConfiguration.

private void addConfiguration(ConfigurationSet aSet, String aCasGroupId, FeatureStructure aFS) {
    if (aFS instanceof SofaFS) {
        return;
    }
    if (aSet.position.getFeature() == null) {
        // Check if this configuration is already present
        Configuration configuration = null;
        for (Configuration cfg : aSet.configurations) {
            // Handle main positions
            if (equalsFS(cfg.getRepresentative(cases), aFS)) {
                configuration = cfg;
                break;
            }
        }
        // Not found, add new one
        if (configuration == null) {
            configuration = new Configuration(aSet.position);
            aSet.configurations.add(configuration);
        }
        configuration.add(aCasGroupId, aFS);
    } else {
        Feature feat = aFS.getType().getFeatureByBaseName(aSet.position.getFeature());
        // configurations for it.
        if (feat == null) {
            return;
        }
        // For each slot at the given position in the FS-to-be-added, we need find a
        // corresponding configuration
        ArrayFS links = (ArrayFS) aFS.getFeatureValue(feat);
        for (int i = 0; i < links.size(); i++) {
            FeatureStructure link = links.get(i);
            DiffAdapter adapter = getAdapter(aFS.getType().getName());
            LinkFeatureDecl decl = adapter.getLinkFeature(aSet.position.getFeature());
            // Check if this configuration is already present
            Configuration configuration = null;
            switch(aSet.position.getLinkCompareBehavior()) {
                case LINK_TARGET_AS_LABEL:
                    {
                        String role = link.getStringValue(link.getType().getFeatureByBaseName(decl.getRoleFeature()));
                        if (!role.equals(aSet.position.getRole())) {
                            continue;
                        }
                        AnnotationFS target = (AnnotationFS) link.getFeatureValue(link.getType().getFeatureByBaseName(decl.getTargetFeature()));
                        cfgLoop: for (Configuration cfg : aSet.configurations) {
                            FeatureStructure repFS = cfg.getRepresentative(cases);
                            AID repAID = cfg.getRepresentativeAID();
                            FeatureStructure repLink = ((ArrayFS) repFS.getFeatureValue(repFS.getType().getFeatureByBaseName(decl.getName()))).get(repAID.index);
                            AnnotationFS repTarget = (AnnotationFS) repLink.getFeatureValue(repLink.getType().getFeatureByBaseName(decl.getTargetFeature()));
                            // Compare targets
                            if (equalsAnnotationFS(repTarget, target)) {
                                configuration = cfg;
                                break cfgLoop;
                            }
                        }
                        break;
                    }
                case LINK_ROLE_AS_LABEL:
                    {
                        AnnotationFS target = (AnnotationFS) link.getFeatureValue(link.getType().getFeatureByBaseName(decl.getTargetFeature()));
                        if (!(target.getBegin() == aSet.position.getLinkTargetBegin() && target.getEnd() == aSet.position.getLinkTargetEnd())) {
                            continue;
                        }
                        String role = link.getStringValue(link.getType().getFeatureByBaseName(decl.getRoleFeature()));
                        cfgLoop: for (Configuration cfg : aSet.configurations) {
                            FeatureStructure repFS = cfg.getRepresentative(cases);
                            AID repAID = cfg.getRepresentativeAID();
                            FeatureStructure repLink = ((ArrayFS) repFS.getFeatureValue(repFS.getType().getFeatureByBaseName(decl.getName()))).get(repAID.index);
                            String linkRole = repLink.getStringValue(repLink.getType().getFeatureByBaseName(decl.getRoleFeature()));
                            // Compare roles
                            if (role.equals(linkRole)) {
                                configuration = cfg;
                                break cfgLoop;
                            }
                        }
                        break;
                    }
                default:
                    throw new IllegalStateException("Unknown link target comparison mode [" + linkCompareBehavior + "]");
            }
            // Not found, add new one
            if (configuration == null) {
                configuration = new Configuration(aSet.position);
                aSet.configurations.add(configuration);
            }
            configuration.add(aCasGroupId, aFS, aSet.position.getFeature(), i);
        }
    }
    aSet.casGroupIds.add(aCasGroupId);
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) ArrayFS(org.apache.uima.cas.ArrayFS) SofaFS(org.apache.uima.cas.SofaFS) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter) AID(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 4 with DiffAdapter

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter in project webanno by webanno.

the class CasDiff method doDiff.

/**
 * Calculate the differences between CASes. This method scopes the calculation of differences to
 * a span instead of calculating them on the whole text.
 *
 * @param aAdapters
 *            a set of diff adapters telling how the diff algorithm should handle different
 *            features
 * @param aCasMap
 *            a set of CASes, each associated with an ID
 * @param aBegin
 *            begin of the span for which differences should be calculated.
 * @param aEnd
 *            end of the span for which differences should be calculated.
 * @return a diff.
 */
public static CasDiff doDiff(Iterable<? extends DiffAdapter> aAdapters, LinkCompareBehavior aLinkCompareBehavior, Map<String, List<CAS>> aCasMap, int aBegin, int aEnd) {
    if (aCasMap.isEmpty()) {
        return new CasDiff(0, 0, aAdapters, aLinkCompareBehavior);
    }
    List<CAS> casList = aCasMap.values().iterator().next();
    if (casList.isEmpty()) {
        return new CasDiff(0, 0, aAdapters, aLinkCompareBehavior);
    }
    long startTime = System.currentTimeMillis();
    sanityCheck(aCasMap);
    CasDiff diff = new CasDiff(aBegin, aEnd, aAdapters, aLinkCompareBehavior);
    for (Entry<String, List<CAS>> e : aCasMap.entrySet()) {
        int casId = 0;
        for (CAS cas : e.getValue()) {
            for (DiffAdapter adapter : aAdapters) {
                // null elements in the list can occur if a user has never worked on a CAS
                diff.addCas(e.getKey(), casId, cas != null ? cas : null, adapter.getType());
            }
            casId++;
        }
    }
    LOG.trace("CASDiff completed in {} ms", System.currentTimeMillis() - startTime);
    return diff;
}
Also used : CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter)

Example 5 with DiffAdapter

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter in project webanno by webanno.

the class CasDiff method equalsAnnotationFS.

private boolean equalsAnnotationFS(AnnotationFS aFS1, AnnotationFS aFS2) {
    // Null check
    if (aFS1 == null || aFS2 == null) {
        return false;
    }
    // Position check
    DiffAdapter adapter = getAdapter(aFS1.getType().getName());
    Position pos1 = adapter.getPosition(0, aFS1);
    Position pos2 = adapter.getPosition(0, aFS2);
    return pos1.compareTo(pos2) == 0;
}
Also used : Position(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter) RelationDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter) DiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter)

Aggregations

DiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter)15 ArrayList (java.util.ArrayList)8 RelationDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter)7 SpanDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter)7 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)7 DiffResult (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.DiffResult)5 CAS (org.apache.uima.cas.CAS)5 CasDiff (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff)4 DefaultAgreementTraits (de.tudarmstadt.ukp.clarin.webanno.agreement.measures.DefaultAgreementTraits)3 CodingAgreementResult (de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.CodingAgreementResult)3 ConfigurationSet (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.ConfigurationSet)3 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)3 Arrays.asList (java.util.Arrays.asList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)3 IAgreementMeasure (org.dkpro.statistics.agreement.IAgreementMeasure)3 ColoringStrategy (de.tudarmstadt.ukp.clarin.webanno.api.annotation.coloring.ColoringStrategy)2 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)2