Search in sources :

Example 31 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AbstractAlignmentMerger method mergeAlignment.

/**
     * /**
     * Merges the alignment data with the non-aligned records from the source BAM file.
     */
public void mergeAlignment(final File referenceFasta) {
    // Open the file of unmapped records and write the read groups to the the header for the merged file
    final SamReader unmappedSam = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(this.unmappedBamFile);
    final CloseableIterator<SAMRecord> unmappedIterator = unmappedSam.iterator();
    this.header.setReadGroups(unmappedSam.getFileHeader().getReadGroups());
    int aligned = 0;
    int unmapped = 0;
    // Get the aligned records and set up the first one
    alignedIterator = new MultiHitAlignedReadIterator(new FilteringSamIterator(getQuerynameSortedAlignedRecords(), alignmentFilter), primaryAlignmentSelectionStrategy);
    HitsForInsert nextAligned = nextAligned();
    // sets the program group
    if (getProgramRecord() != null) {
        for (final SAMProgramRecord pg : unmappedSam.getFileHeader().getProgramRecords()) {
            if (pg.getId().equals(getProgramRecord().getId())) {
                throw new GATKException("Program Record ID already in use in unmapped BAM file.");
            }
        }
    }
    // Create the sorting collection that will write the records in the coordinate order
    // to the final bam file
    final SortingCollection<SAMRecord> sorted = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), MAX_RECORDS_IN_RAM);
    while (unmappedIterator.hasNext()) {
        // Load next unaligned read or read pair.
        final SAMRecord rec = unmappedIterator.next();
        rec.setHeaderStrict(this.header);
        maybeSetPgTag(rec);
        final SAMRecord secondOfPair;
        if (rec.getReadPairedFlag()) {
            secondOfPair = unmappedIterator.next();
            secondOfPair.setHeaderStrict(this.header);
            maybeSetPgTag(secondOfPair);
            // Validate that paired reads arrive as first of pair followed by second of pair
            if (!rec.getReadName().equals(secondOfPair.getReadName()))
                throw new GATKException("Second read from pair not found in unmapped bam: " + rec.getReadName() + ", " + secondOfPair.getReadName());
            if (!rec.getFirstOfPairFlag())
                throw new GATKException("First record in unmapped bam is not first of pair: " + rec.getReadName());
            if (!secondOfPair.getReadPairedFlag())
                throw new GATKException("Second record in unmapped bam is not marked as paired: " + secondOfPair.getReadName());
            if (!secondOfPair.getSecondOfPairFlag())
                throw new GATKException("Second record in unmapped bam is not second of pair: " + secondOfPair.getReadName());
        } else {
            secondOfPair = null;
        }
        // See if there are alignments for current unaligned read or read pair.
        if (nextAligned != null && rec.getReadName().equals(nextAligned.getReadName())) {
            // If there are multiple alignments for a read (pair), then the unaligned SAMRecord must be cloned
            // before copying info from the aligned record to the unaligned.
            final boolean clone = nextAligned.numHits() > 1 || nextAligned.hasSupplementalHits();
            SAMRecord r1Primary = null, r2Primary = null;
            if (rec.getReadPairedFlag()) {
                for (int i = 0; i < nextAligned.numHits(); ++i) {
                    // firstAligned or secondAligned may be null, if there wasn't an alignment for the end,
                    // or if the alignment was rejected by ignoreAlignment.
                    final SAMRecord firstAligned = nextAligned.getFirstOfPair(i);
                    final SAMRecord secondAligned = nextAligned.getSecondOfPair(i);
                    final boolean isPrimaryAlignment = (firstAligned != null && !firstAligned.isSecondaryOrSupplementary()) || (secondAligned != null && !secondAligned.isSecondaryOrSupplementary());
                    final SAMRecord firstToWrite;
                    final SAMRecord secondToWrite;
                    if (clone) {
                        firstToWrite = ReadUtils.cloneSAMRecord(rec);
                        secondToWrite = ReadUtils.cloneSAMRecord(secondOfPair);
                    } else {
                        firstToWrite = rec;
                        secondToWrite = secondOfPair;
                    }
                    // If these are the primary alignments then stash them for use on any supplemental alignments
                    if (isPrimaryAlignment) {
                        r1Primary = firstToWrite;
                        r2Primary = secondToWrite;
                    }
                    transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite, firstAligned, secondAligned);
                    // Only write unmapped read when it has the mate info from the primary alignment.
                    if (!firstToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
                        addIfNotFiltered(sorted, firstToWrite);
                        if (firstToWrite.getReadUnmappedFlag())
                            ++unmapped;
                        else
                            ++aligned;
                    }
                    if (!secondToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
                        addIfNotFiltered(sorted, secondToWrite);
                        if (!secondToWrite.getReadUnmappedFlag())
                            ++aligned;
                        else
                            ++unmapped;
                    }
                }
                // Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
                for (final boolean isRead1 : new boolean[] { true, false }) {
                    final List<SAMRecord> supplementals = isRead1 ? nextAligned.getSupplementalFirstOfPairOrFragment() : nextAligned.getSupplementalSecondOfPair();
                    final SAMRecord sourceRec = isRead1 ? rec : secondOfPair;
                    final SAMRecord matePrimary = isRead1 ? r2Primary : r1Primary;
                    for (final SAMRecord supp : supplementals) {
                        final SAMRecord out = ReadUtils.cloneSAMRecord(sourceRec);
                        transferAlignmentInfoToFragment(out, supp);
                        if (matePrimary != null)
                            SamPairUtil.setMateInformationOnSupplementalAlignment(out, matePrimary, addMateCigar);
                        ++aligned;
                        addIfNotFiltered(sorted, out);
                    }
                }
            } else {
                for (int i = 0; i < nextAligned.numHits(); ++i) {
                    final SAMRecord recToWrite = clone ? ReadUtils.cloneSAMRecord(rec) : rec;
                    transferAlignmentInfoToFragment(recToWrite, nextAligned.getFragment(i));
                    addIfNotFiltered(sorted, recToWrite);
                    if (recToWrite.getReadUnmappedFlag())
                        ++unmapped;
                    else
                        ++aligned;
                }
                // Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
                for (final SAMRecord supplementalRec : nextAligned.getSupplementalFirstOfPairOrFragment()) {
                    final SAMRecord recToWrite = ReadUtils.cloneSAMRecord(rec);
                    transferAlignmentInfoToFragment(recToWrite, supplementalRec);
                    addIfNotFiltered(sorted, recToWrite);
                    ++aligned;
                }
            }
            nextAligned = nextAligned();
        } else {
            // There was no alignment for this read or read pair.
            if (nextAligned != null && SAMRecordQueryNameComparator.compareReadNames(rec.getReadName(), nextAligned.getReadName()) > 0) {
                throw new IllegalStateException("Aligned record iterator (" + nextAligned.getReadName() + ") is behind the unmapped reads (" + rec.getReadName() + ")");
            }
            // No matching read from alignedIterator -- just output reads as is.
            if (!alignedReadsOnly) {
                sorted.add(rec);
                ++unmapped;
                if (secondOfPair != null) {
                    sorted.add(secondOfPair);
                    ++unmapped;
                }
            }
        }
    }
    unmappedIterator.close();
    Utils.validate(!alignedIterator.hasNext(), () -> "Reads remaining on alignment iterator: " + alignedIterator.next().getReadName() + "!");
    alignedIterator.close();
    // Write the records to the output file in specified sorted order,
    header.setSortOrder(this.sortOrder);
    final boolean presorted = this.sortOrder == SAMFileHeader.SortOrder.coordinate;
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(header, presorted, this.targetBamFile, referenceFasta);
    writer.setProgressLogger(new ProgressLogger(logger, (int) 1e7, "Wrote", "records from a sorting collection"));
    final ProgressLogger finalProgress = new ProgressLogger(logger, 10000000, "Written in coordinate order to output", "records");
    for (final SAMRecord rec : sorted) {
        if (!rec.getReadUnmappedFlag()) {
            if (refSeq != null) {
                final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
                rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
                if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
                    rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
                }
            }
        }
        writer.addAlignment(rec);
        finalProgress.record(rec);
    }
    writer.close();
    sorted.cleanup();
    CloserUtil.close(unmappedSam);
    logger.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
}
Also used : ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 32 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AlignmentUtils method getMismatchCount.

/**
     * Count how many bases mismatch the reference.  Indels are not considered mismatching.
     *
     * @param r                   the sam record to check against
     * @param refSeq              the byte array representing the reference sequence
     * @param refIndex            the index in the reference byte array of the read's first base (the reference index
     *                            is matching the alignment start, there may be tons of soft-clipped bases before/after
     *                            that so it's wrong to compare with getLength() here.).  Note that refIndex is
     *                            zero based, not 1 based
     * @param startOnRead         the index in the read's bases from which we start counting
     * @param nReadBases          the number of bases after (but including) startOnRead that we check
     * @return non-null object representing the mismatch count
     */
@SuppressWarnings("fallthrough")
public static MismatchCount getMismatchCount(GATKRead r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
    Utils.nonNull(r);
    Utils.nonNull(refSeq);
    if (refIndex < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference index that is negative");
    if (startOnRead < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count with a read start that is negative");
    if (nReadBases < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count for a negative number of read bases");
    if (refSeq.length - refIndex < (r.getEnd() - r.getStart()))
        throw new IllegalArgumentException("attempting to calculate the mismatch count against a reference string that is smaller than the read");
    MismatchCount mc = new MismatchCount();
    int readIdx = 0;
    // index of the last base on read we want to count (note we are including soft-clipped bases with this math)
    final int endOnRead = startOnRead + nReadBases - 1;
    final byte[] readSeq = r.getBases();
    final Cigar c = r.getCigar();
    final byte[] readQuals = r.getBaseQualities();
    for (final CigarElement ce : c.getCigarElements()) {
        if (readIdx > endOnRead)
            break;
        final int elementLength = ce.getLength();
        switch(ce.getOperator()) {
            case X:
                mc.numMismatches += elementLength;
                for (int j = 0; j < elementLength; j++) mc.mismatchQualities += readQuals[readIdx + j];
            case EQ:
                refIndex += elementLength;
                readIdx += elementLength;
                break;
            case M:
                for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
                    if (refIndex >= refSeq.length)
                        // TODO : It should never happen, we should throw exception here
                        continue;
                    if (readIdx < startOnRead)
                        continue;
                    if (readIdx > endOnRead)
                        break;
                    byte refChr = refSeq[refIndex];
                    byte readChr = readSeq[readIdx];
                    //    continue; // do not count Ns/Xs/etc ?
                    if (readChr != refChr) {
                        mc.numMismatches++;
                        mc.mismatchQualities += readQuals[readIdx];
                    }
                }
                break;
            case I:
            case S:
                readIdx += elementLength;
                break;
            case D:
            case N:
                refIndex += elementLength;
                break;
            case H:
            case P:
                break;
            default:
                throw new GATKException("The " + ce.getOperator() + " cigar element is not currently supported");
        }
    }
    return mc;
}
Also used : Cigar(htsjdk.samtools.Cigar) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 33 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class QualQuantizer method intervalsToMap.

/**
     * Given a final forest of intervals constructs a list mapping
     * list.get(i) => quantized qual to use for original quality score i
     *
     * This function should be called only once to initialize the corresponding
     * cached value in this object, as the calculation is a bit costly.
     *
     * @param intervals
     * @return
     */
private List<Byte> intervalsToMap(final TreeSet<QualInterval> intervals) {
    final List<Byte> map = new ArrayList<>(getNQualsInHistogram());
    map.addAll(Collections.nCopies(getNQualsInHistogram(), Byte.MIN_VALUE));
    for (final QualInterval interval : intervals) {
        for (int q = interval.qStart; q <= interval.qEnd; q++) {
            map.set(q, interval.getQual());
        }
    }
    if (Collections.min(map) == Byte.MIN_VALUE)
        throw new GATKException("quantized quality score map contains an un-initialized value");
    return map;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 34 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class CigarUtils method countRefBasesBasedOnCigar.

/**
     * Compute the number of reference bases between the start (inclusive) and end (exclusive) cigar elements.
     * Note: The method does NOT use CigarOperator.consumesReferenceBases, since it checks something different.
     * The idea is you remove some elements from the beginning of the cigar string,
     * and want to recalculate what if the new starting reference position,
     * you want to count all the elements that indicate existing bases in the reference
     * (everything but I and P).
     * For example original position = 10. cigar: 2M3I2D1M. If you remove the 2M the new starting position is 12.
     * If you remove the 2M3I it is still 12. If you remove 2M3I2D (not reasonable cigar), you will get position 14.
     */
@SuppressWarnings("fallthru")
public static int countRefBasesBasedOnCigar(final GATKRead read, final int cigarStartIndex, final int cigarEndIndex) {
    if (read == null) {
        throw new IllegalArgumentException("null read");
    }
    final List<CigarElement> elems = read.getCigarElements();
    if (cigarStartIndex < 0 || cigarEndIndex > elems.size() || cigarStartIndex > cigarEndIndex) {
        throw new IllegalArgumentException("invalid index:" + 0 + " -" + elems.size());
    }
    int result = 0;
    for (int i = cigarStartIndex; i < cigarEndIndex; i++) {
        final CigarElement cigarElement = elems.get(i);
        switch(cigarElement.getOperator()) {
            case M:
            case D:
            case N:
            case EQ:
            case X:
            case S:
            case H:
                result += cigarElement.getLength();
                break;
            case I:
            case //for these two, nothing happens.
            P:
                break;
            default:
                throw new GATKException("Unsupported cigar operator: " + cigarElement.getOperator());
        }
    }
    return result;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 35 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class FeatureCache method trimToNewStartPosition.

/**
     * Trims the cache to the specified new start position by discarding all records that end before it
     * while preserving relative ordering of records.
     *
     * @param newStart new start position on the current contig to which to trim the cache
     */
public void trimToNewStartPosition(final int newStart) {
    if (newStart > cachedInterval.getEnd()) {
        throw new GATKException(String.format("BUG: attempted to trim Feature cache to an improper new start position (%d). Cache stop = %d", newStart, cachedInterval.getEnd()));
    }
    List<CACHED_FEATURE> overlappingFeaturesBeforeNewStart = new ArrayList<>(EXPECTED_MAX_OVERLAPPING_FEATURES_DURING_CACHE_TRIM);
    // by start position.
    while (!cache.isEmpty() && cache.getFirst().getStart() < newStart) {
        CACHED_FEATURE featureBeforeNewStart = cache.removeFirst();
        if (featureBeforeNewStart.getEnd() >= newStart) {
            overlappingFeaturesBeforeNewStart.add(featureBeforeNewStart);
        }
    }
    // relative ordering in the cache is restored.
    for (int i = overlappingFeaturesBeforeNewStart.size() - 1; i >= 0; --i) {
        cache.addFirst(overlappingFeaturesBeforeNewStart.get(i));
    }
    // Record our new start boundary
    cachedInterval = new SimpleInterval(cachedInterval.getContig(), newStart, cachedInterval.getEnd());
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Aggregations

GATKException (org.broadinstitute.hellbender.exceptions.GATKException)96 IOException (java.io.IOException)19 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 CigarElement (htsjdk.samtools.CigarElement)12 ArrayList (java.util.ArrayList)10 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 Cigar (htsjdk.samtools.Cigar)7 File (java.io.File)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 OutputStream (java.io.OutputStream)5 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 Tuple2 (scala.Tuple2)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 BufferedOutputStream (java.io.BufferedOutputStream)3 InputStream (java.io.InputStream)3 BigInteger (java.math.BigInteger)3 java.util (java.util)3