use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class ReorderSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary refDict = reference.getSequenceDictionary();
if (refDict == null) {
CloserUtil.close(in);
throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
}
printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
printDictionary("Reference", refDict);
Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
// has to be after we create the newOrder
SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
outHeader.setSequenceDictionary(refDict);
logger.info("Writing reads...");
if (in.hasIndex()) {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
// write the reads in contig order
for (final SAMSequenceRecord contig : refDict.getSequences()) {
final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
writeReads(out, it, newOrder, contig.getSequenceName());
}
// don't forget the unmapped reads
writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
}
} else {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
writeReads(out, in.iterator(), newOrder, "All reads");
}
}
// cleanup
CloserUtil.close(in);
return null;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class UpdateVCFSequenceDictionary method apply.
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
// Validate each variant against the source dictionary manually
SAMSequenceRecord samSeqRec = sourceDictionary.getSequence(vc.getContig());
if (samSeqRec == null) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that is not present in the provided dictionary", vc.getID(), vc.getContig()));
} else if (vc.getEnd() > samSeqRec.getSequenceLength()) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that ends at a position (%d) that exceeds the length of that sequence (%d) in the provided dictionary", vc.getID(), vc.getContig(), vc.getEnd(), samSeqRec.getSequenceLength()));
}
vcfWriter.add(vc);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class IndexUtils method getSamSequenceDictionaryFromIndex.
private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
final List<String> seqNames = index.getSequenceNames();
if (seqNames == null || seqNames.isEmpty()) {
return null;
}
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
//use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
return dict;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class GenomeLocParser method createGenomeLocAtStart.
/**
* Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
* @param loc The original loc
* @param maxBasePairs The maximum number of basePairs
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
*/
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
final String contigName = loc.getContig();
final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
final int contigIndex = contig.getSequenceIndex();
int start = loc.getStart() - maxBasePairs;
int stop = loc.getStart() - 1;
if (start < 1)
start = 1;
if (stop < 1)
return null;
return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SparkUtils method writeBAMHeaderToStream.
/**
* Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
* to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
*
* @param samFileHeader SAM header to write
* @param outputStream stream to write the SAM header to
*/
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
final String headerString;
final Writer stringWriter = new StringWriter();
new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
headerString = stringWriter.toString();
outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
// calculate and write the length of the SAM file header text and the header text
outputBinaryCodec.writeString(headerString, true, false);
// write the sequences binarily. This is redundant with the text header
outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
}
try {
blockCompressedOutputStream.flush();
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
Aggregations