use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class FragmentCollection method create.
/**
* Generic algorithm that takes an iterable over T objects, a getter routine to extract the reads in T,
* and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or
* not) with their mate pairs.
*
* @param readContainingObjects An iterator of objects that contain SAMRecords
* @param nElements the number of elements to be provided by the iterator, which is usually known upfront and
* greatly improves the efficiency of the fragment calculation
* @param getter a helper function that takes an object of type T and returns is associated SAMRecord
* @param <T>
* @return a fragment collection
*/
private static <T> FragmentCollection<T> create(final Iterable<T> readContainingObjects, final int nElements, final Function<T, GATKRead> getter) {
Collection<T> singletons = null;
Collection<List<T>> overlapping = null;
Map<String, T> nameMap = null;
int lastStart = -1;
// build an initial map, grabbing all of the multi-read fragments
for (final T p : readContainingObjects) {
final GATKRead read = getter.apply(p);
if (read.getStart() < lastStart) {
throw new IllegalArgumentException(String.format("FragmentUtils.create assumes that the incoming objects are ordered by " + "SAMRecord alignment start, but saw a read %s with alignment start " + "%d before the previous start %d", read.getName(), read.getStart(), lastStart));
}
lastStart = read.getStart();
if (!read.isPaired() || read.mateIsUnmapped() || read.getMateStart() == 0 || read.getMateStart() > read.getEnd()) {
// if we know that this read won't overlap its mate, or doesn't have one, jump out early
if (singletons == null) {
// lazy init
singletons = new ArrayList<>(nElements);
}
singletons.add(p);
} else {
// the read might overlap it's mate, or is the rightmost read of a pair
final String readName = read.getName();
final T pe1 = nameMap == null ? null : nameMap.get(readName);
if (pe1 != null) {
// assumes we have at most 2 reads per fragment
if (overlapping == null) {
// lazy init
overlapping = new ArrayList<>();
}
overlapping.add(Arrays.asList(pe1, p));
nameMap.remove(readName);
} else {
if (nameMap == null) {
// lazy init
nameMap = new LinkedHashMap<>(nElements);
}
nameMap.put(readName, p);
}
}
}
// up to the oneReadPile
if (nameMap != null && !nameMap.isEmpty()) {
if (singletons == null) {
singletons = nameMap.values();
} else {
singletons.addAll(nameMap.values());
}
}
return new FragmentCollection<>(singletons, overlapping);
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class FragmentUtils method adjustQualsOfOverlappingPairedFragments.
public static void adjustQualsOfOverlappingPairedFragments(final List<GATKRead> overlappingPair) {
Utils.validateArg(overlappingPair.size() == 2, () -> "Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2.");
final GATKRead firstRead = overlappingPair.get(0);
final GATKRead secondRead = overlappingPair.get(1);
if (ReadUtils.getSoftStart(secondRead) < ReadUtils.getSoftStart(firstRead)) {
adjustQualsOfOverlappingPairedFragments(secondRead, firstRead);
} else {
adjustQualsOfOverlappingPairedFragments(firstRead, secondRead);
}
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class ClippingOp method applyHARDCLIP_BASES.
/**
* Hard clip bases from read, from start to stop in base coordinates
*
* If start == 0, then we will clip from the front of the read, otherwise we clip
* from the right. If start == 0 and stop == 10, this would clip out the first
* 10 bases of the read.
*
* Note that this function works with reads with negative alignment starts, in order to
* allow us to hardClip reads that have had their soft clips reverted and so might have
* negative alignment starts
*
* Works properly with reduced reads and insertion/deletion base qualities
*
* Note: this method does not assume that the read is directly modifiable
* and makes a copy of it.
*
* @param read a non-null read
* @param start a start >= 0 and < read.length
* @param stop a stop >= 0 and < read.length.
* @return a cloned version of read that has been properly trimmed down
*/
private GATKRead applyHARDCLIP_BASES(final GATKRead read, final int start, final int stop) {
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
//Get the cigar once to avoid multiple calls because each makes a copy of the cigar
final Cigar cigar = read.getCigar();
final CigarShift cigarShift = read.isUnmapped() ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(cigar, start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
final int newLength = read.getLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
final byte[] newBases = new byte[newLength];
final byte[] newQuals = new byte[newLength];
final int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
final GATKRead hardClippedRead = read.copy();
hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setBases(newBases);
hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0) {
hardClippedRead.setPosition(read.getContig(), read.getStart() + calculateAlignmentStartShift(cigar, cigarShift.cigar));
}
if (hasBaseIndelQualities(read)) {
final byte[] newBaseInsertionQuals = new byte[newLength];
final byte[] newBaseDeletionQuals = new byte[newLength];
System.arraycopy(getBaseInsertionQualities(read), copyStart, newBaseInsertionQuals, 0, newLength);
System.arraycopy(getBaseDeletionQualities(read), copyStart, newBaseDeletionQuals, 0, newLength);
setInsertionBaseQualities(hardClippedRead, newBaseInsertionQuals);
setDeletionBaseQualities(hardClippedRead, newBaseDeletionQuals);
}
return hardClippedRead;
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class ReadLikelihoods method removeSampleReads.
// Requires that the collection passed iterator can remove elements, and it can be modified.
public void removeSampleReads(final int sampleIndex, final Collection<GATKRead> readsToRemove, final int alleleCount) {
final GATKRead[] sampleReads = readsBySampleIndex[sampleIndex];
final int sampleReadCount = sampleReads.length;
final Object2IntMap<GATKRead> indexByRead = readIndexBySampleIndex(sampleIndex);
// Count how many we are going to remove, which ones (indexes) and remove entry from the read-index map.
final boolean[] removeIndex = new boolean[sampleReadCount];
// captures the number of deletions.
int removeCount = 0;
// captures the first position that was deleted.
int firstDeleted = sampleReadCount;
final Iterator<GATKRead> readsToRemoveIterator = readsToRemove.iterator();
while (readsToRemoveIterator.hasNext()) {
final GATKRead read = readsToRemoveIterator.next();
if (indexByRead.containsKey(read)) {
final int index = indexByRead.getInt(read);
if (firstDeleted > index) {
firstDeleted = index;
}
removeCount++;
removeIndex[index] = true;
readsToRemoveIterator.remove();
indexByRead.remove(read);
}
}
// Nothing to remove we just finish here.
if (removeCount == 0) {
return;
}
final int newSampleReadCount = sampleReadCount - removeCount;
// Now we skim out the removed reads from the read array.
final GATKRead[] oldSampleReads = readsBySampleIndex[sampleIndex];
final GATKRead[] newSampleReads = new GATKRead[newSampleReadCount];
System.arraycopy(oldSampleReads, 0, newSampleReads, 0, firstDeleted);
Utils.skimArray(oldSampleReads, firstDeleted, newSampleReads, firstDeleted, removeIndex, firstDeleted);
// Update the indices for the extant reads from the first deletion onwards.
for (int r = firstDeleted; r < newSampleReadCount; r++) {
indexByRead.put(newSampleReads[r], r);
}
// Then we skim out the likelihoods of the removed reads.
final double[][] oldSampleValues = valuesBySampleIndex[sampleIndex];
final double[][] newSampleValues = new double[alleleCount][newSampleReadCount];
for (int a = 0; a < alleleCount; a++) {
System.arraycopy(oldSampleValues[a], 0, newSampleValues[a], 0, firstDeleted);
Utils.skimArray(oldSampleValues[a], firstDeleted, newSampleValues[a], firstDeleted, removeIndex, firstDeleted);
}
valuesBySampleIndex[sampleIndex] = newSampleValues;
readsBySampleIndex[sampleIndex] = newSampleReads;
// reset the unmodifiable list.
readListBySampleIndex[sampleIndex] = null;
}
use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.
the class ReadLikelihoods method overlappingReadIndicesBySampleIndex.
private int[][] overlappingReadIndicesBySampleIndex(final Locatable overlap) {
if (overlap == null) {
return null;
}
final int sampleCount = samples.numberOfSamples();
final int[][] result = new int[sampleCount][];
final IntArrayList buffer = new IntArrayList(200);
final String contig = overlap.getContig();
final int overlapStart = overlap.getStart();
final int overlapEnd = overlap.getEnd();
for (int s = 0; s < sampleCount; s++) {
buffer.clear();
final GATKRead[] sampleReads = readsBySampleIndex[s];
final int sampleReadCount = sampleReads.length;
buffer.ensureCapacity(sampleReadCount);
for (int r = 0; r < sampleReadCount; r++) {
if (unclippedReadOverlapsRegion(sampleReads[r], contig, overlapStart, overlapEnd)) {
buffer.add(r);
}
}
result[s] = buffer.toIntArray();
}
return result;
}
Aggregations