Search in sources :

Example 6 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class ReorderSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SAMSequenceDictionary refDict = reference.getSequenceDictionary();
    if (refDict == null) {
        CloserUtil.close(in);
        throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
    }
    printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
    printDictionary("Reference", refDict);
    Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
    // has to be after we create the newOrder
    SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
    outHeader.setSequenceDictionary(refDict);
    logger.info("Writing reads...");
    if (in.hasIndex()) {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
            // write the reads in contig order
            for (final SAMSequenceRecord contig : refDict.getSequences()) {
                final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
                writeReads(out, it, newOrder, contig.getSequenceName());
            }
            // don't forget the unmapped reads
            writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
        }
    } else {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
            writeReads(out, in.iterator(), newOrder, "All reads");
        }
    }
    // cleanup
    CloserUtil.close(in);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 7 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class UpdateVCFSequenceDictionary method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
    // Validate each variant against the source dictionary manually
    SAMSequenceRecord samSeqRec = sourceDictionary.getSequence(vc.getContig());
    if (samSeqRec == null) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that is not present in the provided dictionary", vc.getID(), vc.getContig()));
    } else if (vc.getEnd() > samSeqRec.getSequenceLength()) {
        throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that ends at a position (%d) that exceeds the length of that sequence (%d) in the provided dictionary", vc.getID(), vc.getContig(), vc.getEnd(), samSeqRec.getSequenceLength()));
    }
    vcfWriter.add(vc);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException)

Example 8 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class IndexUtils method getSamSequenceDictionaryFromIndex.

private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
    final List<String> seqNames = index.getSequenceNames();
    if (seqNames == null || seqNames.isEmpty()) {
        return null;
    }
    final SAMSequenceDictionary dict = new SAMSequenceDictionary();
    //use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
    seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
    return dict;
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 9 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class GenomeLocParser method createGenomeLocAtStart.

/**
     * Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
     * @param loc The original loc
     * @param maxBasePairs The maximum number of basePairs
     * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
     */
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
    if (GenomeLoc.isUnmapped(loc))
        return null;
    final String contigName = loc.getContig();
    final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
    final int contigIndex = contig.getSequenceIndex();
    int start = loc.getStart() - maxBasePairs;
    int stop = loc.getStart() - 1;
    if (start < 1)
        start = 1;
    if (stop < 1)
        return null;
    return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord)

Example 10 with SAMSequenceRecord

use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.

the class SparkUtils method writeBAMHeaderToStream.

/**
     * Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
     * to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
     *
     * @param samFileHeader SAM header to write
     * @param outputStream stream to write the SAM header to
     */
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
    final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
    final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
    final String headerString;
    final Writer stringWriter = new StringWriter();
    new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
    headerString = stringWriter.toString();
    outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
    // calculate and write the length of the SAM file header text and the header text
    outputBinaryCodec.writeString(headerString, true, false);
    // write the sequences binarily.  This is redundant with the text header
    outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
    for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
        outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
        outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
    }
    try {
        blockCompressedOutputStream.flush();
    } catch (final IOException ioe) {
        throw new RuntimeIOException(ioe);
    }
}
Also used : BinaryCodec(htsjdk.samtools.util.BinaryCodec) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException)

Aggregations

SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)72 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)35 Test (org.testng.annotations.Test)26 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)24 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 File (java.io.File)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 DataProvider (org.testng.annotations.DataProvider)8 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)7 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 QueryInterval (htsjdk.samtools.QueryInterval)5 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4