Search in sources :

Example 1 with SofaFS

use of org.apache.uima.cas.SofaFS in project webanno by webanno.

the class CasDiff2 method equalsFS.

/**
 * Compare two feature structure to each other. Comparison is done recursively, but stops at
 * feature values that are annotations. For these, only offsets are checked, but feature values
 * are not inspected further. If the annotations are relevant, their type should be added to the
 * entry types and will then be checked and grouped separately.
 *
 * @param aFS1
 *            first feature structure.
 * @param aFS2
 *            second feature structure.
 * @return {@code true} if they are equal.
 */
public boolean equalsFS(FeatureStructure aFS1, FeatureStructure aFS2) {
    // Trivial case
    if (aFS1 == aFS2) {
        return true;
    }
    // Null check
    if (aFS1 == null || aFS2 == null) {
        return false;
    }
    // Trivial case
    if (aFS1.getCAS() == aFS2.getCAS() && getAddr(aFS1) == getAddr(aFS2)) {
        return true;
    }
    Type type1 = aFS1.getType();
    Type type2 = aFS2.getType();
    // Types must be the same
    if (!type1.getName().equals(type2.getName())) {
        return false;
    }
    assert type1.getNumberOfFeatures() == type2.getNumberOfFeatures();
    // Sort features by name to be independent over implementation details that may change the
    // order of the features as returned from Type.getFeatures().
    String[] cachedSortedFeatures = sortedFeaturesCache.get(type1.getName());
    if (cachedSortedFeatures == null) {
        cachedSortedFeatures = new String[type1.getNumberOfFeatures()];
        int i = 0;
        for (Feature f : aFS1.getType().getFeatures()) {
            cachedSortedFeatures[i] = f.getShortName();
            i++;
        }
        sortedFeaturesCache.put(type1.getName(), cachedSortedFeatures);
    }
    DiffAdapter adapter = typeAdapters.get(type1.getName());
    if (adapter == null) {
        log.warn("No diff adapter for type [" + type1.getName() + "] -- ignoring!");
        return true;
    }
    // Only consider label features. In particular these must not include position features
    // such as begin, end, etc.
    List<String> sortedFeatures = new ArrayList<>(asList(cachedSortedFeatures));
    Set<String> labelFeatures = adapter.getLabelFeatures();
    sortedFeatures.removeIf(f -> !labelFeatures.contains(f));
    if (!recurseIntoLinkFeatures) {
        // #1795 Chili REC: We can/should change CasDiff2 such that it does not recurse into
        // link features (or rather into any features that are covered by their own
        // sub-positions). So when when comparing two spans that differ only in their slots
        // (sub-positions) the main position could still exhibit agreement.
        sortedFeatures.removeIf(f -> adapter.getLinkFeature(f) != null);
    }
    for (String feature : sortedFeatures) {
        Feature f1 = type1.getFeatureByBaseName(feature);
        Feature f2 = type2.getFeatureByBaseName(feature);
        switch(f1.getRange().getName()) {
            case CAS.TYPE_NAME_BOOLEAN:
                if (aFS1.getBooleanValue(f1) != aFS2.getBooleanValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_BYTE:
                if (aFS1.getByteValue(f1) != aFS2.getByteValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_DOUBLE:
                if (aFS1.getDoubleValue(f1) != aFS2.getDoubleValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_FLOAT:
                if (aFS1.getFloatValue(f1) != aFS2.getFloatValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_INTEGER:
                if (aFS1.getIntValue(f1) != aFS2.getIntValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_LONG:
                if (aFS1.getLongValue(f1) != aFS2.getLongValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_SHORT:
                if (aFS1.getShortValue(f1) != aFS2.getShortValue(f2)) {
                    return false;
                }
                break;
            case CAS.TYPE_NAME_STRING:
                if (!StringUtils.equals(aFS1.getStringValue(f1), aFS2.getStringValue(f2))) {
                    return false;
                }
                break;
            default:
                {
                    // Must be some kind of feature structure then
                    FeatureStructure valueFS1 = aFS1.getFeatureValue(f1);
                    FeatureStructure valueFS2 = aFS2.getFeatureValue(f2);
                    // Ignore the SofaFS - we already checked that the CAS is the same.
                    if (valueFS1 instanceof SofaFS) {
                        continue;
                    }
                    // If the feature value is an annotation, we just check the position is the same,
                    // but we do not go in deeper. If we we wanted to know differences on this type,
                    // then it should have been added as an entry type.
                    // 
                    // Q: Why do we not check if they are the same based on the CAS address?
                    // A: Because we are checking across CASes and addresses can differ.
                    // 
                    // Q: Why do we not check recursively?
                    // A: Because e.g. for chains, this would mean we consider the whole chain as a
                    // single annotation, but we want to consider each link as an annotation
                    TypeSystem ts1 = aFS1.getCAS().getTypeSystem();
                    if (ts1.subsumes(ts1.getType(CAS.TYPE_NAME_ANNOTATION), type1)) {
                        if (!equalsAnnotationFS((AnnotationFS) aFS1, (AnnotationFS) aFS2)) {
                            return false;
                        }
                    }
                    // just dealing with structured features. It is ok to check these deeply.
                    if (!equalsFS(valueFS1, valueFS2)) {
                        return false;
                    }
                }
        }
    }
    return true;
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) TypeSystem(org.apache.uima.cas.TypeSystem) ArrayList(java.util.ArrayList) SofaFS(org.apache.uima.cas.SofaFS) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Aggregations

AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)1 ArrayList (java.util.ArrayList)1 Feature (org.apache.uima.cas.Feature)1 FeatureStructure (org.apache.uima.cas.FeatureStructure)1 SofaFS (org.apache.uima.cas.SofaFS)1 Type (org.apache.uima.cas.Type)1 TypeSystem (org.apache.uima.cas.TypeSystem)1 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)1