use of org.dishevelled.bio.assembly.gfa2.Position in project webanno by webanno.
the class AgreementUtils method configurationSetsWithItemsToCsv.
private static void configurationSetsWithItemsToCsv(CSVPrinter aOut, AgreementResult aAgreement, List<ConfigurationSet> aSets) throws IOException {
List<String> headers = new ArrayList<>(asList("Type", "Collection", "Document", "Layer", "Feature", "Position"));
headers.addAll(aAgreement.getCasGroupIds());
aOut.printRecord(headers);
int i = 0;
for (ICodingAnnotationItem item : aAgreement.getStudy().getItems()) {
Position pos = aSets.get(i).getPosition();
List<String> values = new ArrayList<>();
values.add(pos.getClass().getSimpleName());
values.add(pos.getCollectionId());
values.add(pos.getDocumentId());
values.add(pos.getType());
values.add(aAgreement.getFeature());
values.add(aSets.get(i).getPosition().toMinimalString());
for (IAnnotationUnit unit : item.getUnits()) {
values.add(String.valueOf(unit.getCategory()));
}
aOut.printRecord(values);
i++;
}
}
use of org.dishevelled.bio.assembly.gfa2.Position in project dishevelled-bio by heuermh.
the class Gfa1ToGfa2 method call.
@Override
public Integer call() throws Exception {
PrintWriter writer = null;
try {
writer = writer(outputGfa2File);
final PrintWriter w = writer;
Gfa1Reader.stream(reader(inputGfa1File), new Gfa1Adapter() {
@Override
public boolean header(final Header header) {
// convert VN:Z:1.0 to VN:Z:2.0 annotation if present
if (header.getAnnotations().containsKey("VN")) {
if (!"1.0".equals(header.getAnnotations().get("VN").getValue())) {
throw new RuntimeException("cannot convert input as GFA 1.0, was " + header.getAnnotations().get("VN").getValue());
}
Map<String, Annotation> annotations = new HashMap<String, Annotation>();
annotations.put("VN", new Annotation("VN", "Z", "2.0"));
for (Annotation annotation : header.getAnnotations().values()) {
if (!"VN".equals(annotation.getName())) {
annotations.put(annotation.getName(), annotation);
}
}
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(annotations), w);
} else {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Header(header.getAnnotations()), w);
}
return true;
}
@Override
public boolean segment(final Segment segment) {
if (segment.getSequence() != null) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), segment.getSequence().length(), segment.getSequence(), segment.getAnnotations()), w);
} else if (segment.getAnnotations().containsKey("LN")) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), Integer.parseInt(segment.getAnnotations().get("LN").getValue()), segment.getSequence(), segment.getAnnotations()), w);
} else {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Segment(segment.getId(), 0, segment.getSequence(), segment.getAnnotations()), w);
}
return true;
}
@Override
public boolean link(final Link link) {
Position unknown = new Position(0, false);
Alignment alignment = link.getOverlap() == null ? null : Alignment.valueOf(link.getOverlap());
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(link.getSource()), toGfa2Reference(link.getTarget()), unknown, unknown, unknown, unknown, alignment, link.getAnnotations()), w);
return true;
}
@Override
public boolean containment(final Containment containment) {
Position unknown = new Position(0, false);
Position targetStart = new Position(containment.getPosition(), false);
Alignment alignment = containment.getOverlap() == null ? null : Alignment.valueOf(containment.getOverlap());
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Edge(null, toGfa2Reference(containment.getContainer()), toGfa2Reference(containment.getContained()), unknown, unknown, targetStart, unknown, alignment, containment.getAnnotations()), w);
return true;
}
@Override
public boolean path(final Path path) {
Gfa2Writer.write(new org.dishevelled.bio.assembly.gfa2.Path(path.getName(), toGfa2References(path.getSegments()), path.getAnnotations()), w);
return true;
}
});
return 0;
} finally {
try {
writer.close();
} catch (Exception e) {
// empty
}
}
}
use of org.dishevelled.bio.assembly.gfa2.Position in project webanno by webanno.
the class MergeCas method reMergeCas.
/**
* Using {@code DiffResult}, determine the annotations to be deleted from the randomly generated
* MergeCase. The initial Merge CAs is stored under a name {@code CurationPanel#CURATION_USER}.
* <p>
* Any similar annotations stacked in a {@code CasDiff2.Position} will be assumed a difference
* <p>
* Any two annotation with different value will be assumed a difference
* <p>
* Any non stacked empty/null annotations are assumed agreement
* <p>
* Any non stacked annotations with similar values for each of the features are assumed
* agreement
* <p>
* Any two link mode / slotable annotations which agree on the base features are assumed
* agreement
*
* @param aDiff
* the {@code CasDiff2.DiffResult}
* @param aJCases
* a map of{@code JCas}s for each users and the random merge
* @return the actual merge {@code JCas}
*/
public static JCas reMergeCas(DiffResult aDiff, Map<String, JCas> aJCases) {
Set<FeatureStructure> slotFeaturesToReset = new HashSet<>();
Set<FeatureStructure> annotationsToDelete = new HashSet<>();
Set<String> users = aJCases.keySet();
for (Position position : aDiff.getPositions()) {
Map<String, List<FeatureStructure>> annosPerUser = new HashMap<>();
ConfigurationSet cfgs = aDiff.getConfigurtionSet(position);
if (cfgs.getConfigurations(WebAnnoConst.CURATION_USER).isEmpty()) {
// annotations
continue;
}
AnnotationFS mergeAnno = (AnnotationFS) cfgs.getConfigurations(WebAnnoConst.CURATION_USER).get(0).getFs(WebAnnoConst.CURATION_USER, aJCases);
// Get Annotations per user in this position
getAllAnnosOnPosition(aJCases, annosPerUser, users, mergeAnno);
for (FeatureStructure mergeFs : annosPerUser.get(WebAnnoConst.CURATION_USER)) {
// incomplete annotations
if (aJCases.size() != annosPerUser.size()) {
annotationsToDelete.add(mergeFs);
} else // agreed and not stacked
if (isAgree(mergeFs, annosPerUser)) {
Type t = mergeFs.getType();
Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
// Is this a relation?
if (sourceFeat != null && targetFeat != null) {
AnnotationFS source = (AnnotationFS) mergeFs.getFeatureValue(sourceFeat);
AnnotationFS target = (AnnotationFS) mergeFs.getFeatureValue(targetFeat);
// all span anno on this source positions
Map<String, List<FeatureStructure>> sourceAnnosPerUser = new HashMap<>();
// all span anno on this target positions
Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
getAllAnnosOnPosition(aJCases, sourceAnnosPerUser, users, source);
getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, target);
if (isAgree(source, sourceAnnosPerUser) && isAgree(target, targetAnnosPerUser)) {
slotFeaturesToReset.add(mergeFs);
} else {
annotationsToDelete.add(mergeFs);
}
} else {
slotFeaturesToReset.add(mergeFs);
}
} else // disagree or stacked annotations
{
annotationsToDelete.add(mergeFs);
}
// remove dangling rels
// setDanglingRelToDel(aJCases.get(CurationPanel.CURATION_USER),
// mergeFs, annotationsToDelete);
}
}
// remove annotations that do not agree or are a stacked ones
for (FeatureStructure fs : annotationsToDelete) {
if (!slotFeaturesToReset.contains(fs)) {
JCas mergeCas = aJCases.get(WebAnnoConst.CURATION_USER);
// Check if this difference is on POS, STEM and LEMMA (so remove from the token too)
Type type = fs.getType();
int fsBegin = ((AnnotationFS) fs).getBegin();
int fsEnd = ((AnnotationFS) fs).getEnd();
if (type.getName().equals(POS.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setPos(null);
}
if (type.getName().equals(Stem.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setStem(null);
}
if (type.getName().equals(Lemma.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setLemma(null);
}
if (type.getName().equals(MorphologicalFeatures.class.getName())) {
Token t = JCasUtil.selectCovered(mergeCas, Token.class, fsBegin, fsEnd).get(0);
t.setMorph(null);
}
mergeCas.removeFsFromIndexes(fs);
}
}
// if slot bearing annotation, clean
for (FeatureStructure baseFs : slotFeaturesToReset) {
for (Feature roleFeature : baseFs.getType().getFeatures()) {
if (isLinkMode(baseFs, roleFeature)) {
// FeatureStructure roleFs = baseFs.getFeatureValue(f);
ArrayFS roleFss = (ArrayFS) WebAnnoCasUtil.getFeatureFS(baseFs, roleFeature.getShortName());
if (roleFss == null) {
continue;
}
Map<String, ArrayFS> roleAnnosPerUser = new HashMap<>();
setAllRoleAnnosOnPosition(aJCases, roleAnnosPerUser, users, baseFs, roleFeature);
List<FeatureStructure> linkFSes = new LinkedList<>(Arrays.asList(roleFss.toArray()));
for (FeatureStructure roleFs : roleFss.toArray()) {
if (isRoleAgree(roleFs, roleAnnosPerUser)) {
for (Feature targetFeature : roleFs.getType().getFeatures()) {
if (isBasicFeature(targetFeature)) {
continue;
}
if (!targetFeature.getShortName().equals("target")) {
continue;
}
AnnotationFS targetFs = (AnnotationFS) roleFs.getFeatureValue(targetFeature);
if (targetFs == null) {
continue;
}
Map<String, List<FeatureStructure>> targetAnnosPerUser = new HashMap<>();
getAllAnnosOnPosition(aJCases, targetAnnosPerUser, users, targetFs);
// do not agree on targets
if (!isAgree(targetFs, targetAnnosPerUser)) {
linkFSes.remove(roleFs);
}
}
} else // do not agree on some role features
{
linkFSes.remove(roleFs);
}
}
ArrayFS array = baseFs.getCAS().createArrayFS(linkFSes.size());
array.copyFromArray(linkFSes.toArray(new FeatureStructure[linkFSes.size()]), 0, 0, linkFSes.size());
baseFs.setFeatureValue(roleFeature, array);
}
}
}
return aJCases.get(WebAnnoConst.CURATION_USER);
}
use of org.dishevelled.bio.assembly.gfa2.Position in project webanno by webanno.
the class AgreementUtils method makeStudy.
private static AgreementResult makeStudy(DiffResult aDiff, Collection<String> aUsers, String aType, String aFeature, boolean aExcludeIncomplete, boolean aNullLabelsAsEmpty, Map<String, List<JCas>> aCasMap) {
List<String> users = new ArrayList<>(aUsers);
Collections.sort(users);
List<ConfigurationSet> completeSets = new ArrayList<>();
List<ConfigurationSet> setsWithDifferences = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByPosition = new ArrayList<>();
List<ConfigurationSet> incompleteSetsByLabel = new ArrayList<>();
List<ConfigurationSet> pluralitySets = new ArrayList<>();
List<ConfigurationSet> irrelevantSets = new ArrayList<>();
CodingAnnotationStudy study = new CodingAnnotationStudy(users.size());
// Check if the feature we are looking at is a primitive feature or a link feature
// We do this by looking it up in the first available CAS. Mind that at this point all
// CASes should have exactly the same typesystem.
JCas someCas = findSomeCas(aCasMap);
if (someCas == null) {
// Well... there is NOTHING here!
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
TypeSystem ts = someCas.getTypeSystem();
// We should just do the right thing here which is: do nothing
if (ts.getType(aType) == null) {
// All positions are irrelevant
aDiff.getPositions().forEach(p -> irrelevantSets.add(aDiff.getConfigurtionSet(p)));
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
// Check that the feature really exists instead of just getting a NPE later
if (ts.getType(aType).getFeatureByBaseName(aFeature) == null) {
throw new IllegalArgumentException("Type [" + aType + "] has no feature called [" + aFeature + "]");
}
boolean isPrimitiveFeature = ts.getType(aType).getFeatureByBaseName(aFeature).getRange().isPrimitive();
nextPosition: for (Position p : aDiff.getPositions()) {
ConfigurationSet cfgSet = aDiff.getConfigurtionSet(p);
// Only calculate agreement for the given layer
if (!cfgSet.getPosition().getType().equals(aType)) {
// We don't even consider these as irrelevant, they are just filtered out
continue;
}
// If the feature on a position is set, then it is a subposition
boolean isSubPosition = p.getFeature() != null;
// this is an inverted XOR!
if (!(isPrimitiveFeature ^ isSubPosition)) {
irrelevantSets.add(cfgSet);
continue;
}
// feature
if (isSubPosition && !aFeature.equals(cfgSet.getPosition().getFeature())) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
// If non of the current users has made any annotation at this position, then skip it
if (users.stream().filter(u -> cfgSet.getCasGroupIds().contains(u)).count() == 0) {
irrelevantSets.add(cfgSet);
continue nextPosition;
}
Object[] values = new Object[users.size()];
int i = 0;
for (String user : users) {
// this configuration set.
if (!cfgSet.getCasGroupIds().contains(user)) {
incompleteSetsByPosition.add(cfgSet);
if (aExcludeIncomplete) {
// Record as incomplete
continue nextPosition;
} else {
// Record as missing value
values[i] = null;
i++;
continue;
}
}
// Make sure a single user didn't do multiple alternative annotations at a single
// position. So there is currently no support for calculating agreement on stacking
// annotations.
List<Configuration> cfgs = cfgSet.getConfigurations(user);
if (cfgs.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
Configuration cfg = cfgs.get(0);
// Check if source and/or targets of a relation are stacked
if (cfg.getPosition() instanceof ArcPosition) {
ArcPosition pos = (ArcPosition) cfg.getPosition();
FeatureStructure arc = cfg.getFs(user, pos.getCasId(), aCasMap);
ArcDiffAdapter adapter = (ArcDiffAdapter) aDiff.getDiffAdapter(pos.getType());
// Check if the source of the relation is stacked
AnnotationFS source = FSUtil.getFeature(arc, adapter.getSourceFeature(), AnnotationFS.class);
List<AnnotationFS> sourceCandidates = CasUtil.selectAt(arc.getCAS(), source.getType(), source.getBegin(), source.getEnd());
if (sourceCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
// Check if the target of the relation is stacked
AnnotationFS target = FSUtil.getFeature(arc, adapter.getTargetFeature(), AnnotationFS.class);
List<AnnotationFS> targetCandidates = CasUtil.selectAt(arc.getCAS(), target.getType(), target.getBegin(), target.getEnd());
if (targetCandidates.size() > 1) {
pluralitySets.add(cfgSet);
continue nextPosition;
}
}
// Only calculate agreement for the given feature
FeatureStructure fs = cfg.getFs(user, cfg.getPosition().getCasId(), aCasMap);
// BEGIN PARANOIA
assert fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() == isPrimitiveFeature;
// should never have gotten here in the first place.
assert !isPrimitiveFeature || !isSubPosition;
if (isPrimitiveFeature && !isSubPosition) {
// Primitive feature / primary position
values[i] = getFeature(fs, aFeature);
} else if (!isPrimitiveFeature && isSubPosition) {
// Link feature / sub-position
ArrayFS links = (ArrayFS) fs.getFeatureValue(fs.getType().getFeatureByBaseName(aFeature));
FeatureStructure link = links.get(cfg.getAID(user).index);
switch(cfg.getPosition().getLinkCompareBehavior()) {
case LINK_TARGET_AS_LABEL:
// FIXME The target feature name should be obtained from the feature
// definition!
AnnotationFS target = (AnnotationFS) link.getFeatureValue(link.getType().getFeatureByBaseName("target"));
values[i] = target.getBegin() + "-" + target.getEnd() + " [" + target.getCoveredText() + "]";
break;
case LINK_ROLE_AS_LABEL:
// FIXME The role feature name should be obtained from the feature
// definition!
String role = link.getStringValue(link.getType().getFeatureByBaseName("role"));
values[i] = role;
break;
default:
throw new IllegalStateException("Unknown link target comparison mode [" + cfg.getPosition().getLinkCompareBehavior() + "]");
}
} else {
throw new IllegalStateException("Should never get here: primitive: " + fs.getType().getFeatureByBaseName(aFeature).getRange().isPrimitive() + "; subpos: " + isSubPosition);
}
// agreement calculation. The empty label is still a valid label.
if (aNullLabelsAsEmpty && values[i] == null) {
values[i] = "";
}
// "null" cannot be used in agreement calculations. We treat these as incomplete
if (values[i] == null) {
incompleteSetsByLabel.add(cfgSet);
if (aExcludeIncomplete) {
continue nextPosition;
}
}
i++;
}
if (ObjectUtils.notEqual(values[0], values[1])) {
setsWithDifferences.add(cfgSet);
}
// are calculating agreement over
assert cfgSet.getPosition().getFeature() == null || cfgSet.getPosition().getFeature().equals(aFeature);
completeSets.add(cfgSet);
study.addItemAsArray(values);
}
return new AgreementResult(aType, aFeature, aDiff, study, users, completeSets, irrelevantSets, setsWithDifferences, incompleteSetsByPosition, incompleteSetsByLabel, pluralitySets, aExcludeIncomplete);
}
Aggregations