use of org.molgenis.data.annotation.core.datastructures.Location in project molgenis by molgenis.
the class MultiAllelicResultFilter method merge.
/**
* Combine ALT information per reference allele (in VCF there is only 1 reference by letting ALT vary, but that
* might not always be the case)
* <p>
* So we want to support this hypothetical example:
* 3 300 G A 0.2|23.1
* 3 300 G T -2.4|0.123
* 3 300 G X -0.002|2.3
* 3 300 G C 0.5|14.5
* 3 300 GC A 0.2|23.1
* 3 300 GC T -2.4|0.123
* 3 300 C GX -0.002|2.3
* 3 300 C GC 0.5|14.5
* <p>
* <p>
* So we want to support this hypothetical example: 3 300 G A 0.2|23.1 3 300 G T -2.4|0.123 3 300 G X -0.002|2.3 3
* 300 G C 0.5|14.5 3 300 GC A 0.2|23.1 3 300 GC T -2.4|0.123 3 300 C GX -0.002|2.3 3 300 C GC 0.5|14.5
* <p>
* and it should become:
* <p>
* 3 300 G A,T,X,C 0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5 3 300 GC A,T 0.2|23.1,-2.4|0.123 3 300 C GX,GC
* -0.002|2.3,0.5|14.5
* <p>
* <p>
* 3 300 G A,T,X,C 0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5
* 3 300 GC A,T 0.2|23.1,-2.4|0.123
* 3 300 C GX,GC -0.002|2.3,0.5|14.5
* <p>
* so that the multi-allelic filter can then find back the appropriate values as if it were a multi-allelic VCF line
*/
public Iterable<Entity> merge(Iterable<Entity> resourceEntities) {
ArrayList<Entity> resourceEntitiesMerged = new ArrayList<>();
PeekingIterator<Entity> resourceEntitiesIterator = Iterators.peekingIterator(resourceEntities.iterator());
if (!resourceEntitiesIterator.hasNext()) {
return resourceEntitiesMerged;
}
Location location = Location.create(resourceEntitiesIterator.peek());
// collect entities to be merged by ref
Multimap<String, Entity> refToMergedEntity = LinkedListMultimap.create();
while (resourceEntitiesIterator.hasNext()) {
Entity resourceEntity = resourceEntitiesIterator.next();
// verify if all results have the same chrom & pos
Location thisLoc = Location.create(resourceEntity);
// at least chrom and pos have to be the same, ref may be different
if (!location.equals(thisLoc)) {
throw new MolgenisDataException("Mismatch in location! " + location + " vs " + thisLoc);
}
// add to map by ref, so we get [ref -> entities to be merged into one]
refToMergedEntity.put(resourceEntity.getString(REF), resourceEntity);
}
// now iterate over map with refs and merge entities per ref
for (String refKey : refToMergedEntity.keySet()) {
boolean first = true;
Entity mergeWithMe = null;
for (Entity entityToBeMerged : refToMergedEntity.get(refKey)) {
if (first) {
// merge all following entities with the first one
mergeWithMe = entityToBeMerged;
first = false;
} else {
// concatenate alleles
mergeWithMe.set(ALT, mergeWithMe.get(ALT).toString() + "," + entityToBeMerged.get(ALT).toString());
// concatenate allele specific attributes
for (Attribute alleleSpecificAttributes : attributes) {
String attrName = alleleSpecificAttributes.getName();
mergeWithMe.set(attrName, mergeWithMe.get(attrName).toString() + "," + entityToBeMerged.get(attrName).toString());
}
}
}
resourceEntitiesMerged.add(mergeWithMe);
}
return resourceEntitiesMerged;
}
Aggregations