use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class AgreementUtils method makeStudy.
private static AgreementResult makeStudy(DiffResult aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<JCas>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
JCas someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurtionSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof ArcPosition) {
ArcPosition pos = (ArcPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
ArcDiffAdapter adapter = (ArcDiffAdapter) aDiff.getDiffAdapter(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
// BEGIN PARANOIA
assert fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() == isPrimitiveFeature;
// should never have gotten here in the first place.
assert !isPrimitiveFeature || !isSubPosition;
if (isPrimitiveFeature && !isSubPosition) {
// Primitive feature / primary position
values[i] = getFeature(fs, aFeature);
} else if (!isPrimitiveFeature && isSubPosition) {
// Link feature / sub-position
ArrayFS links = (ArrayFS) fs.getFeatureValue(fs.getType().getFeatureByBaseName(aFeature));
FeatureStructure link = links.get(cfg.getAID(user).index);
switch(cfg.getPosition().getLinkCompareBehavior()) {
case LINK_TARGET_AS_LABEL:
// FIXME The target feature name should be obtained from the feature
// definition!
AnnotationFS target = (AnnotationFS) link.getFeatureValue(link.getType().getFeatureByBaseName("target"));
values[i] = target.getBegin() + "-" + target.getEnd() + " [" + target.getCoveredText() + "]";
break;
case LINK_ROLE_AS_LABEL:
// FIXME The role feature name should be obtained from the feature
// definition!
String role = link.getStringValue(link.getType().getFeatureByBaseName("role"));
values[i] = role;
break;
default:
throw new IllegalStateException("Unknown link target comparison mode [" + cfg.getPosition().getLinkCompareBehavior() + "]");
}
} else {
throw new IllegalStateException("Should never get here: primitive: " + fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() + "; subpos: " + isSubPosition);
}
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class CasDiff2 method equalsFS.
/**
* Compare two feature structure to each other. Comparison is done recursively, but stops at
* feature values that are annotations. For these, only offsets are checked, but feature values
* are not inspected further. If the annotations are relevant, their type should be added to the
* entry types and will then be checked and grouped separately.
*
* @param aFS1
* first feature structure.
* @param aFS2
* second feature structure.
* @return {@code true} if they are equal.
*/
public boolean equalsFS(FeatureStructure aFS1, FeatureStructure aFS2) {
// Trivial case
if (aFS1 == aFS2) {
return true;
}
// Null check
if (aFS1 == null || aFS2 == null) {
return false;
}
// Trivial case
if (aFS1.getCAS() == aFS2.getCAS() && getAddr(aFS1) == getAddr(aFS2)) {
return true;
}
Type type1 = aFS1.getType();
Type type2 = aFS2.getType();
// Types must be the same
if (!type1.getName().equals(type2.getName())) {
return false;
}
assert type1.getNumberOfFeatures() == type2.getNumberOfFeatures();
// Sort features by name to be independent over implementation details that may change the
// order of the features as returned from Type.getFeatures().
String[] cachedSortedFeatures = sortedFeaturesCache.get(type1.getName());
if (cachedSortedFeatures == null) {
cachedSortedFeatures = new String[type1.getNumberOfFeatures()];
int i = 0;
for (Feature f : aFS1.getType().getFeatures()) {
cachedSortedFeatures[i] = f.getShortName();
i++;
}
sortedFeaturesCache.put(type1.getName(), cachedSortedFeatures);
}
DiffAdapter adapter = typeAdapters.get(type1.getName());
if (adapter == null) {
log.warn("No diff adapter for type [" + type1.getName() + "] -- ignoring!");
return true;
}
// Only consider label features. In particular these must not include position features
// such as begin, end, etc.
List<String> sortedFeatures = new ArrayList<>(asList(cachedSortedFeatures));
Set<String> labelFeatures = adapter.getLabelFeatures();
sortedFeatures.removeIf(f -> !labelFeatures.contains(f));
if (!recurseIntoLinkFeatures) {
// #1795 Chili REC: We can/should change CasDiff2 such that it does not recurse into
// link features (or rather into any features that are covered by their own
// sub-positions). So when when comparing two spans that differ only in their slots
// (sub-positions) the main position could still exhibit agreement.
sortedFeatures.removeIf(f -> adapter.getLinkFeature(f) != null);
}
for (String feature : sortedFeatures) {
Feature f1 = type1.getFeatureByBaseName(feature);
Feature f2 = type2.getFeatureByBaseName(feature);
switch(f1.getRange().getName()) {
case CAS.TYPE_NAME_BOOLEAN:
if (aFS1.getBooleanValue(f1) != aFS2.getBooleanValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_BYTE:
if (aFS1.getByteValue(f1) != aFS2.getByteValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_DOUBLE:
if (aFS1.getDoubleValue(f1) != aFS2.getDoubleValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_FLOAT:
if (aFS1.getFloatValue(f1) != aFS2.getFloatValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_INTEGER:
if (aFS1.getIntValue(f1) != aFS2.getIntValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_LONG:
if (aFS1.getLongValue(f1) != aFS2.getLongValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_SHORT:
if (aFS1.getShortValue(f1) != aFS2.getShortValue(f2)) {
return false;
}
break;
case CAS.TYPE_NAME_STRING:
if (!StringUtils.equals(aFS1.getStringValue(f1), aFS2.getStringValue(f2))) {
return false;
}
break;
default:
{
// Must be some kind of feature structure then
FeatureStructure valueFS1 = aFS1.getFeatureValue(f1);
FeatureStructure valueFS2 = aFS2.getFeatureValue(f2);
// Ignore the SofaFS - we already checked that the CAS is the same.
if (valueFS1 instanceof SofaFS) {
continue;
}
// If the feature value is an annotation, we just check the position is the same,
// but we do not go in deeper. If we we wanted to know differences on this type,
// then it should have been added as an entry type.
//
// Q: Why do we not check if they are the same based on the CAS address?
// A: Because we are checking across CASes and addresses can differ.
//
// Q: Why do we not check recursively?
// A: Because e.g. for chains, this would mean we consider the whole chain as a
// single annotation, but we want to consider each link as an annotation
TypeSystem ts1 = aFS1.getCAS().getTypeSystem();
if (ts1.subsumes(ts1.getType(CAS.TYPE_NAME_ANNOTATION), type1)) {
if (!equalsAnnotationFS((AnnotationFS) aFS1, (AnnotationFS) aFS2)) {
return false;
}
}
// just dealing with structured features. It is ok to check these deeply.
if (!equalsFS(valueFS1, valueFS2)) {
return false;
}
}
}
}
return true;
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class CasDoctorUtils method collectIndexed.
public static Set<FeatureStructure> collectIndexed(CAS aCas) {
LowLevelCAS llcas = aCas.getLowLevelCAS();
Set<FeatureStructure> fses = new TreeSet<>(Comparator.comparingInt(llcas::ll_getFSRef));
FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(aCas.getTypeSystem().getTopType());
i.forEachRemaining(fses::add);
return fses;
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class CasDoctorUtils method getNonIndexedFSesWithOwner.
public static Map<FeatureStructure, FeatureStructure> getNonIndexedFSesWithOwner(CAS aCas) {
TypeSystem ts = aCas.getTypeSystem();
LowLevelCAS llcas = aCas.getLowLevelCAS();
Set<FeatureStructure> allIndexedFS = collectIndexed(aCas);
Map<FeatureStructure, FeatureStructure> allReachableFS = new TreeMap<>(Comparator.comparingInt(llcas::ll_getFSRef));
FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(aCas.getTypeSystem().getTopType());
i.forEachRemaining(fs -> collect(allReachableFS, allIndexedFS, fs, fs));
// Remove all that are not annotations
allReachableFS.entrySet().removeIf(e -> !ts.subsumes(aCas.getAnnotationType(), e.getKey().getType()));
// Remove all that are indexed
allReachableFS.entrySet().removeIf(e -> e.getKey() == e.getValue());
// All that is left are non-index annotations
return allReachableFS;
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class LinksReachableThroughChainsCheck method check.
@Override
public boolean check(Project aProject, CAS aCas, List<LogMessage> aMessages) {
boolean ok = true;
for (AnnotationLayer layer : annotationService.listAnnotationLayer(aProject)) {
if (!WebAnnoConst.CHAIN_TYPE.equals(layer.getType())) {
continue;
}
Type chainType;
Type linkType;
try {
chainType = getType(aCas, layer.getName() + "Chain");
linkType = getType(aCas, layer.getName() + "Link");
} catch (IllegalArgumentException e) {
// check
continue;
}
List<FeatureStructure> chains = new ArrayList<>(selectFS(aCas, chainType));
List<AnnotationFS> links = new ArrayList<>(select(aCas, linkType));
for (FeatureStructure chain : chains) {
AnnotationFS link = FSUtil.getFeature(chain, "first", AnnotationFS.class);
while (link != null) {
links.remove(link);
link = FSUtil.getFeature(link, "next", AnnotationFS.class);
}
}
if (!links.isEmpty()) {
ok = false;
aMessages.add(new LogMessage(this, LogLevel.ERROR, "CoreferenceLinks not reachable through chains: %d", links.size()));
for (AnnotationFS link : links) {
aMessages.add(new LogMessage(this, LogLevel.ERROR, "Unreachable CoreferenceLink [%s]", link));
}
}
}
return ok;
}
Aggregations