use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class AbstractAlignmentMerger method mergeAlignment.
/**
* /**
* Merges the alignment data with the non-aligned records from the source BAM file.
*/
public void mergeAlignment(final File referenceFasta) {
// Open the file of unmapped records and write the read groups to the the header for the merged file
final SamReader unmappedSam = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(this.unmappedBamFile);
final CloseableIterator<SAMRecord> unmappedIterator = unmappedSam.iterator();
this.header.setReadGroups(unmappedSam.getFileHeader().getReadGroups());
int aligned = 0;
int unmapped = 0;
// Get the aligned records and set up the first one
alignedIterator = new MultiHitAlignedReadIterator(new FilteringSamIterator(getQuerynameSortedAlignedRecords(), alignmentFilter), primaryAlignmentSelectionStrategy);
HitsForInsert nextAligned = nextAligned();
// sets the program group
if (getProgramRecord() != null) {
for (final SAMProgramRecord pg : unmappedSam.getFileHeader().getProgramRecords()) {
if (pg.getId().equals(getProgramRecord().getId())) {
throw new GATKException("Program Record ID already in use in unmapped BAM file.");
}
}
}
// Create the sorting collection that will write the records in the coordinate order
// to the final bam file
final SortingCollection<SAMRecord> sorted = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), MAX_RECORDS_IN_RAM);
while (unmappedIterator.hasNext()) {
// Load next unaligned read or read pair.
final SAMRecord rec = unmappedIterator.next();
rec.setHeaderStrict(this.header);
maybeSetPgTag(rec);
final SAMRecord secondOfPair;
if (rec.getReadPairedFlag()) {
secondOfPair = unmappedIterator.next();
secondOfPair.setHeaderStrict(this.header);
maybeSetPgTag(secondOfPair);
// Validate that paired reads arrive as first of pair followed by second of pair
if (!rec.getReadName().equals(secondOfPair.getReadName()))
throw new GATKException("Second read from pair not found in unmapped bam: " + rec.getReadName() + ", " + secondOfPair.getReadName());
if (!rec.getFirstOfPairFlag())
throw new GATKException("First record in unmapped bam is not first of pair: " + rec.getReadName());
if (!secondOfPair.getReadPairedFlag())
throw new GATKException("Second record in unmapped bam is not marked as paired: " + secondOfPair.getReadName());
if (!secondOfPair.getSecondOfPairFlag())
throw new GATKException("Second record in unmapped bam is not second of pair: " + secondOfPair.getReadName());
} else {
secondOfPair = null;
}
// See if there are alignments for current unaligned read or read pair.
if (nextAligned != null && rec.getReadName().equals(nextAligned.getReadName())) {
// If there are multiple alignments for a read (pair), then the unaligned SAMRecord must be cloned
// before copying info from the aligned record to the unaligned.
final boolean clone = nextAligned.numHits() > 1 || nextAligned.hasSupplementalHits();
SAMRecord r1Primary = null, r2Primary = null;
if (rec.getReadPairedFlag()) {
for (int i = 0; i < nextAligned.numHits(); ++i) {
// firstAligned or secondAligned may be null, if there wasn't an alignment for the end,
// or if the alignment was rejected by ignoreAlignment.
final SAMRecord firstAligned = nextAligned.getFirstOfPair(i);
final SAMRecord secondAligned = nextAligned.getSecondOfPair(i);
final boolean isPrimaryAlignment = (firstAligned != null && !firstAligned.isSecondaryOrSupplementary()) || (secondAligned != null && !secondAligned.isSecondaryOrSupplementary());
final SAMRecord firstToWrite;
final SAMRecord secondToWrite;
if (clone) {
firstToWrite = ReadUtils.cloneSAMRecord(rec);
secondToWrite = ReadUtils.cloneSAMRecord(secondOfPair);
} else {
firstToWrite = rec;
secondToWrite = secondOfPair;
}
// If these are the primary alignments then stash them for use on any supplemental alignments
if (isPrimaryAlignment) {
r1Primary = firstToWrite;
r2Primary = secondToWrite;
}
transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite, firstAligned, secondAligned);
// Only write unmapped read when it has the mate info from the primary alignment.
if (!firstToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
addIfNotFiltered(sorted, firstToWrite);
if (firstToWrite.getReadUnmappedFlag())
++unmapped;
else
++aligned;
}
if (!secondToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
addIfNotFiltered(sorted, secondToWrite);
if (!secondToWrite.getReadUnmappedFlag())
++aligned;
else
++unmapped;
}
}
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
for (final boolean isRead1 : new boolean[] { true, false }) {
final List<SAMRecord> supplementals = isRead1 ? nextAligned.getSupplementalFirstOfPairOrFragment() : nextAligned.getSupplementalSecondOfPair();
final SAMRecord sourceRec = isRead1 ? rec : secondOfPair;
final SAMRecord matePrimary = isRead1 ? r2Primary : r1Primary;
for (final SAMRecord supp : supplementals) {
final SAMRecord out = ReadUtils.cloneSAMRecord(sourceRec);
transferAlignmentInfoToFragment(out, supp);
if (matePrimary != null)
SamPairUtil.setMateInformationOnSupplementalAlignment(out, matePrimary, addMateCigar);
++aligned;
addIfNotFiltered(sorted, out);
}
}
} else {
for (int i = 0; i < nextAligned.numHits(); ++i) {
final SAMRecord recToWrite = clone ? ReadUtils.cloneSAMRecord(rec) : rec;
transferAlignmentInfoToFragment(recToWrite, nextAligned.getFragment(i));
addIfNotFiltered(sorted, recToWrite);
if (recToWrite.getReadUnmappedFlag())
++unmapped;
else
++aligned;
}
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
for (final SAMRecord supplementalRec : nextAligned.getSupplementalFirstOfPairOrFragment()) {
final SAMRecord recToWrite = ReadUtils.cloneSAMRecord(rec);
transferAlignmentInfoToFragment(recToWrite, supplementalRec);
addIfNotFiltered(sorted, recToWrite);
++aligned;
}
}
nextAligned = nextAligned();
} else {
// There was no alignment for this read or read pair.
if (nextAligned != null && SAMRecordQueryNameComparator.compareReadNames(rec.getReadName(), nextAligned.getReadName()) > 0) {
throw new IllegalStateException("Aligned record iterator (" + nextAligned.getReadName() + ") is behind the unmapped reads (" + rec.getReadName() + ")");
}
// No matching read from alignedIterator -- just output reads as is.
if (!alignedReadsOnly) {
sorted.add(rec);
++unmapped;
if (secondOfPair != null) {
sorted.add(secondOfPair);
++unmapped;
}
}
}
}
unmappedIterator.close();
Utils.validate(!alignedIterator.hasNext(), () -> "Reads remaining on alignment iterator: " + alignedIterator.next().getReadName() + "!");
alignedIterator.close();
// Write the records to the output file in specified sorted order,
header.setSortOrder(this.sortOrder);
final boolean presorted = this.sortOrder == SAMFileHeader.SortOrder.coordinate;
final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(header, presorted, this.targetBamFile, referenceFasta);
writer.setProgressLogger(new ProgressLogger(logger, (int) 1e7, "Wrote", "records from a sorting collection"));
final ProgressLogger finalProgress = new ProgressLogger(logger, 10000000, "Written in coordinate order to output", "records");
for (final SAMRecord rec : sorted) {
if (!rec.getReadUnmappedFlag()) {
if (refSeq != null) {
final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
}
}
}
writer.addAlignment(rec);
finalProgress.record(rec);
}
writer.close();
sorted.cleanup();
CloserUtil.close(unmappedSam);
logger.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class AlignmentUtils method getMismatchCount.
/**
* Count how many bases mismatch the reference. Indels are not considered mismatching.
*
* @param r the sam record to check against
* @param refSeq the byte array representing the reference sequence
* @param refIndex the index in the reference byte array of the read's first base (the reference index
* is matching the alignment start, there may be tons of soft-clipped bases before/after
* that so it's wrong to compare with getLength() here.). Note that refIndex is
* zero based, not 1 based
* @param startOnRead the index in the read's bases from which we start counting
* @param nReadBases the number of bases after (but including) startOnRead that we check
* @return non-null object representing the mismatch count
*/
@SuppressWarnings("fallthrough")
public static MismatchCount getMismatchCount(GATKRead r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
Utils.nonNull(r);
Utils.nonNull(refSeq);
if (refIndex < 0)
throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference index that is negative");
if (startOnRead < 0)
throw new IllegalArgumentException("attempting to calculate the mismatch count with a read start that is negative");
if (nReadBases < 0)
throw new IllegalArgumentException("attempting to calculate the mismatch count for a negative number of read bases");
if (refSeq.length - refIndex < (r.getEnd() - r.getStart()))
throw new IllegalArgumentException("attempting to calculate the mismatch count against a reference string that is smaller than the read");
MismatchCount mc = new MismatchCount();
int readIdx = 0;
// index of the last base on read we want to count (note we are including soft-clipped bases with this math)
final int endOnRead = startOnRead + nReadBases - 1;
final byte[] readSeq = r.getBases();
final Cigar c = r.getCigar();
final byte[] readQuals = r.getBaseQualities();
for (final CigarElement ce : c.getCigarElements()) {
if (readIdx > endOnRead)
break;
final int elementLength = ce.getLength();
switch(ce.getOperator()) {
case X:
mc.numMismatches += elementLength;
for (int j = 0; j < elementLength; j++) mc.mismatchQualities += readQuals[readIdx + j];
case EQ:
refIndex += elementLength;
readIdx += elementLength;
break;
case M:
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
if (refIndex >= refSeq.length)
// TODO : It should never happen, we should throw exception here
continue;
if (readIdx < startOnRead)
continue;
if (readIdx > endOnRead)
break;
byte refChr = refSeq[refIndex];
byte readChr = readSeq[readIdx];
// continue; // do not count Ns/Xs/etc ?
if (readChr != refChr) {
mc.numMismatches++;
mc.mismatchQualities += readQuals[readIdx];
}
}
break;
case I:
case S:
readIdx += elementLength;
break;
case D:
case N:
refIndex += elementLength;
break;
case H:
case P:
break;
default:
throw new GATKException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
return mc;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class QualQuantizer method intervalsToMap.
/**
* Given a final forest of intervals constructs a list mapping
* list.get(i) => quantized qual to use for original quality score i
*
* This function should be called only once to initialize the corresponding
* cached value in this object, as the calculation is a bit costly.
*
* @param intervals
* @return
*/
private List<Byte> intervalsToMap(final TreeSet<QualInterval> intervals) {
final List<Byte> map = new ArrayList<>(getNQualsInHistogram());
map.addAll(Collections.nCopies(getNQualsInHistogram(), Byte.MIN_VALUE));
for (final QualInterval interval : intervals) {
for (int q = interval.qStart; q <= interval.qEnd; q++) {
map.set(q, interval.getQual());
}
}
if (Collections.min(map) == Byte.MIN_VALUE)
throw new GATKException("quantized quality score map contains an un-initialized value");
return map;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class CigarUtils method countRefBasesBasedOnCigar.
/**
* Compute the number of reference bases between the start (inclusive) and end (exclusive) cigar elements.
* Note: The method does NOT use CigarOperator.consumesReferenceBases, since it checks something different.
* The idea is you remove some elements from the beginning of the cigar string,
* and want to recalculate what if the new starting reference position,
* you want to count all the elements that indicate existing bases in the reference
* (everything but I and P).
* For example original position = 10. cigar: 2M3I2D1M. If you remove the 2M the new starting position is 12.
* If you remove the 2M3I it is still 12. If you remove 2M3I2D (not reasonable cigar), you will get position 14.
*/
@SuppressWarnings("fallthru")
public static int countRefBasesBasedOnCigar(final GATKRead read, final int cigarStartIndex, final int cigarEndIndex) {
if (read == null) {
throw new IllegalArgumentException("null read");
}
final List<CigarElement> elems = read.getCigarElements();
if (cigarStartIndex < 0 || cigarEndIndex > elems.size() || cigarStartIndex > cigarEndIndex) {
throw new IllegalArgumentException("invalid index:" + 0 + " -" + elems.size());
}
int result = 0;
for (int i = cigarStartIndex; i < cigarEndIndex; i++) {
final CigarElement cigarElement = elems.get(i);
switch(cigarElement.getOperator()) {
case M:
case D:
case N:
case EQ:
case X:
case S:
case H:
result += cigarElement.getLength();
break;
case I:
case //for these two, nothing happens.
P:
break;
default:
throw new GATKException("Unsupported cigar operator: " + cigarElement.getOperator());
}
}
return result;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class FeatureCache method trimToNewStartPosition.
/**
* Trims the cache to the specified new start position by discarding all records that end before it
* while preserving relative ordering of records.
*
* @param newStart new start position on the current contig to which to trim the cache
*/
public void trimToNewStartPosition(final int newStart) {
if (newStart > cachedInterval.getEnd()) {
throw new GATKException(String.format("BUG: attempted to trim Feature cache to an improper new start position (%d). Cache stop = %d", newStart, cachedInterval.getEnd()));
}
List<CACHED_FEATURE> overlappingFeaturesBeforeNewStart = new ArrayList<>(EXPECTED_MAX_OVERLAPPING_FEATURES_DURING_CACHE_TRIM);
// by start position.
while (!cache.isEmpty() && cache.getFirst().getStart() < newStart) {
CACHED_FEATURE featureBeforeNewStart = cache.removeFirst();
if (featureBeforeNewStart.getEnd() >= newStart) {
overlappingFeaturesBeforeNewStart.add(featureBeforeNewStart);
}
}
// relative ordering in the cache is restored.
for (int i = overlappingFeaturesBeforeNewStart.size() - 1; i >= 0; --i) {
cache.addFirst(overlappingFeaturesBeforeNewStart.get(i));
}
// Record our new start boundary
cachedInterval = new SimpleInterval(cachedInterval.getContig(), newStart, cachedInterval.getEnd());
}
Aggregations