Search in sources :

Example 16 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AlignmentUtils method createReadAlignedToRef.

/**
     * Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
     * via the alignment of haplotype (via its getCigar) method.
     *
     * @param originalRead the read we want to write aligned to the reference genome
     * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
     * @param referenceStart the start of the reference that haplotype is aligned to.  Provides global coordinate frame.
     * @param isInformative true if the read is differentially informative for one of the haplotypes
     *
     * @throws IllegalArgumentException if {@code originalRead} is {@code null} or {@code haplotype} is {@code null} or it
     *   does not have a Cigar or the {@code referenceStart} is invalid (less than 1).
     *
     * @return a GATKRead aligned to reference. Never {@code null}.
     */
public static GATKRead createReadAlignedToRef(final GATKRead originalRead, final Haplotype haplotype, final Haplotype refHaplotype, final int referenceStart, final boolean isInformative) {
    Utils.nonNull(originalRead);
    Utils.nonNull(haplotype);
    Utils.nonNull(refHaplotype);
    Utils.nonNull(haplotype.getCigar());
    if (referenceStart < 1) {
        throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
    }
    // compute the smith-waterman alignment of read -> haplotype
    final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getBases(), CigarUtils.NEW_SW_PARAMETERS);
    if (swPairwiseAlignment.getAlignmentStart2wrt1() == -1) {
        // sw can fail (reasons not clear) so if it happens just don't realign the read
        return originalRead;
    }
    final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar());
    // since we're modifying the read we need to clone it
    final GATKRead read = originalRead.copy();
    // only informative reads are given the haplotype tag to enhance visualization
    if (isInformative) {
        read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
    }
    // compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
    final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
    final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
    final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
    read.setPosition(read.getContig(), readStartOnReference);
    // compute the read -> ref alignment by mapping read -> hap -> ref from the
    // SW of read -> hap mapped through the given by hap -> ref
    final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
    final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
    final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
    final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(), originalRead.getBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
    read.setCigar(readToRefCigar);
    if (readToRefCigar.getReadLength() != read.getLength()) {
        throw new GATKException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength() + " != read length " + read.getLength() + " for read " + read.toString() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength() + "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
    }
    return read;
}
Also used : Cigar(htsjdk.samtools.Cigar) SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 17 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AlignmentUtils method calcAlignmentByteArrayOffset.

/**
     * Calculate the index into the read's bases of the beginning of the encompassing cigar element for a given cigar and offset
     *
     * @param cigar            the read's CIGAR -- cannot be null
     * @param offset           the offset to use for the calculation or -1 if in the middle of a deletion
     * @param isDeletion       are we in the middle of a deletion?
     * @param alignmentStart   the alignment start of the read
     * @param refLocus         the reference position of the offset
     * @return a non-negative int index
     */
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) {
    if (cigar == null)
        throw new IllegalArgumentException("attempting to find the alignment position from a CIGAR that is null");
    if (offset < -1)
        throw new IllegalArgumentException("attempting to find the alignment position with an offset that is negative (and not -1)");
    if (alignmentStart < 0)
        throw new IllegalArgumentException("attempting to find the alignment position from an alignment start that is negative");
    if (refLocus < 0)
        throw new IllegalArgumentException("attempting to find the alignment position from a reference position that is negative");
    if (offset >= cigar.getReadLength())
        throw new IllegalArgumentException("attempting to find the alignment position of an offset than is larger than the read length");
    int pileupOffset = offset;
    // Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
    if (isDeletion) {
        pileupOffset = refLocus - alignmentStart;
        final CigarElement ce = cigar.getCigarElement(0);
        if (ce.getOperator() == CigarOperator.S) {
            pileupOffset += ce.getLength();
        }
    }
    int pos = 0;
    int alignmentPos = 0;
    for (int iii = 0; iii < cigar.numCigarElements(); iii++) {
        final CigarElement ce = cigar.getCigarElement(iii);
        final int elementLength = ce.getLength();
        switch(ce.getOperator()) {
            case I:
            case // TODO -- I don't think that soft clips should be treated the same as inserted bases here. Investigation needed.
            S:
                pos += elementLength;
                if (pos >= pileupOffset) {
                    return alignmentPos;
                }
                break;
            case D:
                if (!isDeletion) {
                    alignmentPos += elementLength;
                } else {
                    if (pos + elementLength - 1 >= pileupOffset) {
                        return alignmentPos + (pileupOffset - pos);
                    } else {
                        pos += elementLength;
                        alignmentPos += elementLength;
                    }
                }
                break;
            case M:
            case EQ:
            case X:
                if (pos + elementLength - 1 >= pileupOffset) {
                    return alignmentPos + (pileupOffset - pos);
                } else {
                    pos += elementLength;
                    alignmentPos += elementLength;
                }
                break;
            case H:
            case P:
            case N:
                break;
            default:
                throw new GATKException("Unsupported cigar operator: " + ce.getOperator());
        }
    }
    return alignmentPos;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 18 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AlignmentUtils method isInsideDeletion.

/**
     * Is the offset inside a deletion?
     *
     * @param cigar         the read's CIGAR -- cannot be null
     * @param offset        the offset into the CIGAR
     * @return true if the offset is inside a deletion, false otherwise
     */
public static boolean isInsideDeletion(final Cigar cigar, final int offset) {
    Utils.nonNull(cigar);
    if (offset < 0)
        return false;
    // pos counts read bases
    int pos = 0;
    int prevPos = 0;
    for (final CigarElement ce : cigar.getCigarElements()) {
        switch(ce.getOperator()) {
            case I:
            case S:
            case D:
            case M:
            case EQ:
            case X:
                prevPos = pos;
                pos += ce.getLength();
                break;
            case H:
            case P:
            case N:
                break;
            default:
                throw new GATKException("Unsupported cigar operator: " + ce.getOperator());
        }
        // Is the offset inside a deletion?
        if (prevPos < offset && pos >= offset && ce.getOperator() == CigarOperator.D) {
            return true;
        }
    }
    return false;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 19 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class FindBadGenomicKmersSpark method processFasta.

@VisibleForTesting
static List<SVKmer> processFasta(final int kSize, final int maxDUSTScore, final String fastaFilename, final PipelineOptions options) {
    try (BufferedReader rdr = new BufferedReader(new InputStreamReader(BucketUtils.openFile(fastaFilename)))) {
        final List<SVKmer> kmers = new ArrayList<>((int) BucketUtils.fileSize(fastaFilename));
        String line;
        final StringBuilder sb = new StringBuilder();
        final SVKmer kmerSeed = new SVKmerLong();
        while ((line = rdr.readLine()) != null) {
            if (line.charAt(0) != '>')
                sb.append(line);
            else if (sb.length() > 0) {
                SVDUSTFilteredKmerizer.stream(sb, kSize, maxDUSTScore, kmerSeed).map(kmer -> kmer.canonical(kSize)).forEach(kmers::add);
                sb.setLength(0);
            }
        }
        if (sb.length() > 0) {
            SVDUSTFilteredKmerizer.stream(sb, kSize, maxDUSTScore, kmerSeed).map(kmer -> kmer.canonical(kSize)).forEach(kmers::add);
        }
        return kmers;
    } catch (IOException ioe) {
        throw new GATKException("Can't read high copy kmers fasta file " + fastaFilename, ioe);
    }
}
Also used : Output(com.esotericsoftware.kryo.io.Output) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SAMFileHeader(htsjdk.samtools.SAMFileHeader) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Kryo(com.esotericsoftware.kryo.Kryo) BucketUtils(org.broadinstitute.hellbender.utils.gcs.BucketUtils) HopscotchMap(org.broadinstitute.hellbender.tools.spark.utils.HopscotchMap) Input(com.esotericsoftware.kryo.io.Input) HopscotchSet(org.broadinstitute.hellbender.tools.spark.utils.HopscotchSet) JavaRDD(org.apache.spark.api.java.JavaRDD) DefaultSerializer(com.esotericsoftware.kryo.DefaultSerializer) HashPartitioner(org.apache.spark.HashPartitioner) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) GATKSparkTool(org.broadinstitute.hellbender.engine.spark.GATKSparkTool) IOException(java.io.IOException) Tuple2(scala.Tuple2) InputStreamReader(java.io.InputStreamReader) StructuralVariationSparkProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariationSparkProgramGroup) PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BufferedReader(java.io.BufferedReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 20 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class FindBreakpointEvidenceSpark method getKmerIntervals.

/** find kmers for each interval */
@VisibleForTesting
static Tuple2<List<AlignedAssemblyOrExcuse>, List<KmerAndInterval>> getKmerIntervals(final Params params, final JavaSparkContext ctx, final HopscotchUniqueMultiMap<String, Integer, QNameAndInterval> qNamesMultiMap, final int nIntervals, final Set<SVKmer> kmerKillSet, final JavaRDD<GATKRead> reads, final Locations locations) {
    final Broadcast<Set<SVKmer>> broadcastKmerKillSet = ctx.broadcast(kmerKillSet);
    final Broadcast<HopscotchUniqueMultiMap<String, Integer, QNameAndInterval>> broadcastQNameAndIntervalsMultiMap = ctx.broadcast(qNamesMultiMap);
    // given a set of template names with interval IDs and a kill set of ubiquitous kmers,
    // produce a set of interesting kmers for each interval ID
    final int kmersPerPartitionGuess = params.cleanerKmersPerPartitionGuess;
    final int minKmers = params.cleanerMinKmerCount;
    final int maxKmers = params.cleanerMaxKmerCount;
    final int maxIntervals = params.cleanerMaxIntervals;
    final int kSize = params.kSize;
    final int maxDUSTScore = params.maxDUSTScore;
    final List<KmerAndInterval> kmerIntervals = reads.mapPartitionsToPair(readItr -> new MapPartitioner<>(readItr, new QNameKmerizer(broadcastQNameAndIntervalsMultiMap.value(), broadcastKmerKillSet.value(), kSize, maxDUSTScore)).iterator(), false).reduceByKey(Integer::sum).mapPartitions(itr -> new KmerCleaner(itr, kmersPerPartitionGuess, minKmers, maxKmers, maxIntervals).iterator()).collect();
    broadcastQNameAndIntervalsMultiMap.destroy();
    broadcastKmerKillSet.destroy();
    final int[] intervalKmerCounts = new int[nIntervals];
    for (final KmerAndInterval kmerAndInterval : kmerIntervals) {
        intervalKmerCounts[kmerAndInterval.getIntervalId()] += 1;
    }
    final Set<Integer> intervalsToKill = new HashSet<>();
    final List<AlignedAssemblyOrExcuse> intervalDispositions = new ArrayList<>();
    for (int idx = 0; idx != nIntervals; ++idx) {
        if (intervalKmerCounts[idx] < params.minKmersPerInterval) {
            intervalsToKill.add(idx);
            intervalDispositions.add(new AlignedAssemblyOrExcuse(idx, "FASTQ not written -- too few kmers"));
        }
    }
    qNamesMultiMap.removeIf(qNameAndInterval -> intervalsToKill.contains(qNameAndInterval.getIntervalId()));
    final List<KmerAndInterval> filteredKmerIntervals = kmerIntervals.stream().filter(kmerAndInterval -> !intervalsToKill.contains(kmerAndInterval.getIntervalId())).collect(SVUtils.arrayListCollector(kmerIntervals.size()));
    // record the kmers with their interval IDs
    if (locations.kmerFile != null) {
        try (final OutputStreamWriter writer = new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(locations.kmerFile)))) {
            for (final KmerAndInterval kmerAndInterval : filteredKmerIntervals) {
                writer.write(kmerAndInterval.toString(kSize) + " " + kmerAndInterval.getIntervalId() + "\n");
            }
        } catch (final IOException ioe) {
            throw new GATKException("Can't write kmer intervals file " + locations.kmerFile, ioe);
        }
    }
    return new Tuple2<>(intervalDispositions, filteredKmerIntervals);
}
Also used : BwaMemIndexSingleton(org.broadinstitute.hellbender.utils.bwa.BwaMemIndexSingleton) IntStream(java.util.stream.IntStream) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) BwaMemAlignment(org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment) Function(java.util.function.Function) BwaMemAligner(org.broadinstitute.hellbender.utils.bwa.BwaMemAligner) HopscotchUniqueMultiMap(org.broadinstitute.hellbender.tools.spark.utils.HopscotchUniqueMultiMap) FermiLiteAssembly(org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly) FermiLiteAssembler(org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembler) BucketUtils(org.broadinstitute.hellbender.utils.gcs.BucketUtils) JavaRDD(org.apache.spark.api.java.JavaRDD) Broadcast(org.apache.spark.broadcast.Broadcast) HashPartitioner(org.apache.spark.HashPartitioner) GATKSparkTool(org.broadinstitute.hellbender.engine.spark.GATKSparkTool) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) StructuralVariationSparkProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariationSparkProgramGroup) FindBreakpointEvidenceSparkArgumentCollection(org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.FindBreakpointEvidenceSparkArgumentCollection) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) Utils(org.broadinstitute.hellbender.utils.Utils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) htsjdk.samtools(htsjdk.samtools) MapPartitioner(org.broadinstitute.hellbender.tools.spark.utils.MapPartitioner) LogManager(org.apache.logging.log4j.LogManager) HopscotchUniqueMultiMap(org.broadinstitute.hellbender.tools.spark.utils.HopscotchUniqueMultiMap) Tuple2(scala.Tuple2) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

GATKException (org.broadinstitute.hellbender.exceptions.GATKException)96 IOException (java.io.IOException)19 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 CigarElement (htsjdk.samtools.CigarElement)12 ArrayList (java.util.ArrayList)10 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 Cigar (htsjdk.samtools.Cigar)7 File (java.io.File)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 OutputStream (java.io.OutputStream)5 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 Tuple2 (scala.Tuple2)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 BufferedOutputStream (java.io.BufferedOutputStream)3 InputStream (java.io.InputStream)3 BigInteger (java.math.BigInteger)3 java.util (java.util)3