use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class GenomeLocParser method createGenomeLocAtStart.
/**
* Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
* @param loc The original loc
* @param maxBasePairs The maximum number of basePairs
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
*/
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
final String contigName = loc.getContig();
final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
final int contigIndex = contig.getSequenceIndex();
int start = loc.getStart() - maxBasePairs;
int stop = loc.getStart() - 1;
if (start < 1)
start = 1;
if (stop < 1)
return null;
return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SparkUtils method writeBAMHeaderToStream.
/**
* Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
* to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
*
* @param samFileHeader SAM header to write
* @param outputStream stream to write the SAM header to
*/
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
final String headerString;
final Writer stringWriter = new StringWriter();
new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
headerString = stringWriter.toString();
outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
// calculate and write the length of the SAM file header text and the header text
outputBinaryCodec.writeString(headerString, true, false);
// write the sequences binarily. This is redundant with the text header
outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
}
try {
blockCompressedOutputStream.flush();
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SparkSharder method computePartitionReadExtents.
/**
* For each partition, find the interval that spans it.
*/
static <L extends Locatable> List<PartitionLocatable<SimpleInterval>> computePartitionReadExtents(JavaRDD<L> locatables, SAMSequenceDictionary sequenceDictionary, int maxLocatableLength) {
// Find the first locatable in each partition. This is very efficient since only the first record in each partition is read.
// If a partition is empty then set the locatable to null
List<PartitionLocatable<L>> allSplitPoints = locatables.mapPartitions((FlatMapFunction<Iterator<L>, PartitionLocatable<L>>) it -> ImmutableList.of(new PartitionLocatable<>(-1, it.hasNext() ? it.next() : null)).iterator()).collect();
// fill in index and remove nulls (empty partitions)
List<PartitionLocatable<L>> splitPoints = new ArrayList<>();
for (int i = 0; i < allSplitPoints.size(); i++) {
L locatable = allSplitPoints.get(i).getLocatable();
if (locatable != null) {
splitPoints.add(new PartitionLocatable<L>(i, locatable));
}
}
List<PartitionLocatable<SimpleInterval>> extents = new ArrayList<>();
for (int i = 0; i < splitPoints.size(); i++) {
PartitionLocatable<L> splitPoint = splitPoints.get(i);
int partitionIndex = splitPoint.getPartitionIndex();
Locatable current = splitPoint.getLocatable();
int intervalContigIndex = sequenceDictionary.getSequenceIndex(current.getContig());
final Locatable next;
final int nextContigIndex;
if (i < splitPoints.size() - 1) {
next = splitPoints.get(i + 1);
nextContigIndex = sequenceDictionary.getSequenceIndex(next.getContig());
} else {
next = null;
nextContigIndex = sequenceDictionary.getSequences().size();
}
if (intervalContigIndex == nextContigIndex) {
// same contig
addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), next.getStart() + maxLocatableLength);
} else {
// complete current contig
int contigEnd = sequenceDictionary.getSequence(current.getContig()).getSequenceLength();
addPartitionReadExtent(extents, partitionIndex, current.getContig(), current.getStart(), contigEnd);
// add any whole contigs up to next (exclusive)
for (int contigIndex = intervalContigIndex + 1; contigIndex < nextContigIndex; contigIndex++) {
SAMSequenceRecord sequence = sequenceDictionary.getSequence(contigIndex);
addPartitionReadExtent(extents, partitionIndex, sequence.getSequenceName(), 1, sequence.getSequenceLength());
}
// add start of next contig
if (next != null) {
addPartitionReadExtent(extents, partitionIndex, next.getContig(), 1, next.getStart() + maxLocatableLength);
}
}
}
return extents;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SimpleInterval method expandWithinContig.
/**
* Returns a new SimpleInterval that represents this interval as expanded by the specified amount in both
* directions, bounded by the contig start/stop if necessary.
*
* @param padding amount to expand this interval
* @param sequenceDictionary dictionary to use to determine the length of this interval's contig
* @return a new SimpleInterval that represents this interval as expanded by the specified amount in both
* directions, bounded by the contig start/stop if necessary.
*/
public SimpleInterval expandWithinContig(final int padding, final SAMSequenceDictionary sequenceDictionary) {
Utils.nonNull(sequenceDictionary);
final SAMSequenceRecord contigRecord = sequenceDictionary.getSequence(contig);
Utils.nonNull(contigRecord, () -> "Contig " + contig + " not found in provided dictionary");
return expandWithinContig(padding, contigRecord.getSequenceLength());
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class MRUCachingSAMSequenceDictionary method updateCache.
/**
* The key algorithm. Given a new record, update the last used record, contig
* name, and index.
*
* @param contig the contig we want to look up. If null, index is used instead
* @param index the contig index we want to look up. Only used if contig is null
* @throws GATKException if index isn't present in the dictionary
* @return the SAMSequenceRecord for contig / index
*/
private SAMSequenceRecord updateCache(final String contig, int index) {
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
if (rec == null) {
throw new GATKException("BUG: requested unknown contig=" + contig + " index=" + index);
} else {
lastSSR = rec;
lastContig = rec.getSequenceName();
lastIndex = rec.getSequenceIndex();
return rec;
}
}
Aggregations