Search in sources :

Example 36 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class GenomeLocParser method createGenomeLocAtStart.

/**
     * Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
     * @param loc The original loc
     * @param maxBasePairs The maximum number of basePairs
     * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
     */
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
    if (GenomeLoc.isUnmapped(loc))
        return null;
    final String contigName = loc.getContig();
    final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
    final int contigIndex = contig.getSequenceIndex();
    int start = loc.getStart() - maxBasePairs;
    int stop = loc.getStart() - 1;
    if (start < 1)
        start = 1;
    if (stop < 1)
        return null;
    return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 37 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SparkUtils method writeBAMHeaderToStream.

/**
     * Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
     * to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
     *
     * @param samFileHeader SAM header to write
     * @param outputStream stream to write the SAM header to
     */
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
    final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
    final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
    final String headerString;
    final Writer stringWriter = new StringWriter();
    new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
    headerString = stringWriter.toString();
    outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
    // calculate and write the length of the SAM file header text and the header text
    outputBinaryCodec.writeString(headerString, true, false);
    // write the sequences binarily.  This is redundant with the text header
    outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
    for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
        outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
        outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
    }
    try {
        blockCompressedOutputStream.flush();
    } catch (final IOException ioe) {
        throw new RuntimeIOException(ioe);
    }
}
Also used : BinaryCodec(htsjdk.samtools.util.BinaryCodec) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException)

Example 38 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SparkSharder method computePartitionReadExtents.

/**
     * For each partition, find the interval that spans it.
     */
static <L extends Locatable> List<PartitionLocatable<SimpleInterval>> computePartitionReadExtents(JavaRDD<L> locatables, SAMSequenceDictionary sequenceDictionary, int maxLocatableLength) {
    // Find the first locatable in each partition. This is very efficient since only the first record in each partition is read.
    // If a partition is empty then set the locatable to null
    List<PartitionLocatable<L>> allSplitPoints = locatables.mapPartitions((FlatMapFunction<Iterator<L>, PartitionLocatable<L>>) it -> ImmutableList.of(new PartitionLocatable<>(-1, it.hasNext() ? it.next() : null)).iterator()).collect();
    // fill in index and remove nulls (empty partitions)
    List<PartitionLocatable<L>> splitPoints = new ArrayList<>();
    for (int i = 0; i < allSplitPoints.size(); i++) {
        L locatable = allSplitPoints.get(i).getLocatable();
        if (locatable != null) {
            splitPoints.add(new PartitionLocatable<L>(i, locatable));
        }
    }
    List<PartitionLocatable<SimpleInterval>> extents = new ArrayList<>();
    for (int i = 0; i < splitPoints.size(); i++) {
        PartitionLocatable<L> splitPoint = splitPoints.get(i);
        int partitionIndex = splitPoint.getPartitionIndex();
        Locatable current = splitPoint.getLocatable();
        int intervalContigIndex = sequenceDictionary.getSequenceIndex(current.getContig());
        final Locatable next;
        final int nextContigIndex;
        if (i < splitPoints.size() - 1) {
            next = splitPoints.get(i + 1);
            nextContigIndex = sequenceDictionary.getSequenceIndex(next.getContig());
        } else {
            next = null;
            nextContigIndex = sequenceDictionary.getSequences().size();
        }
        if (intervalContigIndex == nextContigIndex) {
            // same contig
            addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), next.getStart() + maxLocatableLength);
        } else {
            // complete current contig
            int contigEnd = sequenceDictionary.getSequence(current.getContig()).getSequenceLength();
            addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), contigEnd);
            // add any whole contigs up to next (exclusive)
            for (int contigIndex = intervalContigIndex + 1; contigIndex < nextContigIndex; contigIndex++) {
                SAMSequenceRecord sequence = sequenceDictionary.getSequence(contigIndex);
                addPartitionReadExtent(extents, partitionIndex, sequence.getSequenceName(), 1, sequence.getSequenceLength());
            }
            // add start of next contig
            if (next != null) {
                addPartitionReadExtent(extents, partitionIndex, next.getContig(), 1, next.getStart() + maxLocatableLength);
            }
        }
    }
    return extents;
}
Also used : FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Locatable(htsjdk.samtools.util.Locatable)

Example 39 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SimpleInterval method expandWithinContig.

/**
      * Returns a new SimpleInterval that represents this interval as expanded by the specified amount in both
      * directions, bounded by the contig start/stop if necessary.
      *
      * @param padding amount to expand this interval
      * @param sequenceDictionary dictionary to use to determine the length of this interval's contig
      * @return a new SimpleInterval that represents this interval as expanded by the specified amount in both
      *         directions, bounded by the contig start/stop if necessary.
      */
public SimpleInterval expandWithinContig(final int padding, final SAMSequenceDictionary sequenceDictionary) {
    Utils.nonNull(sequenceDictionary);
    final SAMSequenceRecord contigRecord = sequenceDictionary.getSequence(contig);
    Utils.nonNull(contigRecord, () -> "Contig " + contig + " not found in provided dictionary");
    return expandWithinContig(padding, contigRecord.getSequenceLength());
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 40 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class MRUCachingSAMSequenceDictionary method updateCache.

/**
     * The key algorithm.  Given a new record, update the last used record, contig
     * name, and index.
     *
     * @param contig the contig we want to look up.  If null, index is used instead
     * @param index the contig index we want to look up.  Only used if contig is null
     * @throws GATKException if index isn't present in the dictionary
     * @return the SAMSequenceRecord for contig / index
     */
private SAMSequenceRecord updateCache(final String contig, int index) {
    SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
    if (rec == null) {
        throw new GATKException("BUG: requested unknown contig=" + contig + " index=" + index);
    } else {
        lastSSR = rec;
        lastContig = rec.getSequenceName();
        lastIndex = rec.getSequenceIndex();
        return rec;
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4