use of org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator in project gatk by broadinstitute.
the class MarkDuplicatesSparkUtils method handleFragments.
private static List<GATKRead> handleFragments(Iterable<PairedEnds> pairedEnds, final MarkDuplicatesScoringStrategy scoringStrategy, final SAMFileHeader header) {
List<GATKRead> reads = Lists.newArrayList();
final Iterable<GATKRead> transform = Iterables.transform(pairedEnds, pair -> pair.first());
Iterable<GATKRead> readsCopy = Iterables.transform(transform, GATKRead::copy);
final Map<Boolean, List<GATKRead>> byPairing = Utils.stream(readsCopy).collect(Collectors.partitioningBy(read -> ReadUtils.readHasMappedMate(read)));
// Note the we emit only fragments from this mapper.
if (byPairing.get(true).isEmpty()) {
// There are no paired reads, mark all but the highest scoring fragment as duplicate.
Comparator<GATKRead> fragmentsComparator = Comparator.<GATKRead, Integer>comparing(read -> scoringStrategy.score(read)).reversed().thenComparing(new ReadCoordinateComparator(header));
List<GATKRead> frags = byPairing.get(false).stream().sorted(fragmentsComparator).collect(Collectors.toList());
if (!frags.isEmpty()) {
//highest score - just emit
reads.add(frags.get(0));
for (final GATKRead record : Iterables.skip(frags, 1)) {
//lower scores - mark as dups and emit
record.setIsDuplicate(true);
reads.add(record);
}
}
} else {
// There are paired ends so we mark all fragments as duplicates.
for (final GATKRead record : byPairing.get(false)) {
record.setIsDuplicate(true);
reads.add(record);
}
}
return reads;
}
use of org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator in project gatk by broadinstitute.
the class CompareDuplicatesSpark method getDupes.
/**
* getDupes returns the metadata about how well the two sets of reads match.
* @param f reads from the first bam
* @param s reads from the second bam
* @param header header (should be the same for both)
* @return the type of the match, EQUAL, DIFFERENT_REPRESENTATIVE_READ, etc.
*/
static MatchType getDupes(Iterable<GATKRead> f, Iterable<GATKRead> s, SAMFileHeader header) {
List<GATKRead> first = Lists.newArrayList(f);
List<GATKRead> second = Lists.newArrayList(s);
if (first.size() != second.size()) {
return MatchType.SIZE_UNEQUAL;
}
int size = first.size();
first.sort(new ReadCoordinateComparator(header));
second.sort(new ReadCoordinateComparator(header));
Set<GATKRead> firstDupes = Sets.newLinkedHashSet();
Set<GATKRead> secondDupes = Sets.newLinkedHashSet();
for (int i = 0; i < size; ++i) {
GATKRead firstRead = first.get(i);
GATKRead secondRead = second.get(i);
if (!firstRead.getName().equals(secondRead.getName())) {
return MatchType.READ_MISMATCH;
}
if (firstRead.isDuplicate()) {
firstDupes.add(firstRead);
}
if (secondRead.isDuplicate()) {
secondDupes.add(secondRead);
}
}
if (firstDupes.size() != secondDupes.size()) {
return MatchType.DIFF_NUM_DUPES;
}
if (!firstDupes.equals(secondDupes)) {
return MatchType.DIFFERENT_REPRESENTATIVE_READ;
}
return MatchType.EQUAL;
}
Aggregations