Search in sources :

Example 1 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class FindBadGenomicKmersSpark method runTool.

/** Get the list of high copy number kmers in the reference, and write them to a file. */
@Override
protected void runTool(final JavaSparkContext ctx) {
    final SAMFileHeader hdr = getHeaderForReads();
    SAMSequenceDictionary dict = null;
    if (hdr != null)
        dict = hdr.getSequenceDictionary();
    final PipelineOptions options = getAuthenticatedGCSOptions();
    final ReferenceMultiSource referenceMultiSource = getReference();
    Collection<SVKmer> killList = findBadGenomicKmers(ctx, kSize, maxDUSTScore, referenceMultiSource, options, dict);
    if (highCopyFastaFilename != null) {
        killList = uniquify(killList, processFasta(kSize, maxDUSTScore, highCopyFastaFilename, options));
    }
    SVUtils.writeKmersFile(kSize, outputFile, killList);
}
Also used : ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 2 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class StructuralVariationDiscoveryPipelineSpark method runTool.

@Override
protected void runTool(final JavaSparkContext ctx) {
    final SAMFileHeader header = getHeaderForReads();
    final PipelineOptions pipelineOptions = getAuthenticatedGCSOptions();
    // gather evidence, run assembly, and align
    final List<AlignedAssemblyOrExcuse> alignedAssemblyOrExcuseList = FindBreakpointEvidenceSpark.gatherEvidenceAndWriteContigSamFile(ctx, evidenceAndAssemblyArgs, header, getUnfilteredReads(), outputSAM, localLogger);
    if (alignedAssemblyOrExcuseList.isEmpty())
        return;
    // parse the contig alignments and extract necessary information
    @SuppressWarnings("unchecked") final JavaRDD<AlignedContig> parsedAlignments = new InMemoryAlignmentParser(ctx, alignedAssemblyOrExcuseList, header, localLogger).getAlignedContigs();
    if (parsedAlignments.isEmpty())
        return;
    // discover variants and write to vcf
    DiscoverVariantsFromContigAlignmentsSAMSpark.discoverVariantsAndWriteVCF(parsedAlignments, discoverStageArgs.fastaReference, ctx.broadcast(getReference()), pipelineOptions, vcfOutputFileName, localLogger);
}
Also used : PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 3 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class MarkDuplicatesSparkUtils method generateMetrics.

static JavaPairRDD<String, DuplicationMetrics> generateMetrics(final SAMFileHeader header, final JavaRDD<GATKRead> reads) {
    return reads.filter(read -> !read.isSecondaryAlignment() && !read.isSupplementaryAlignment()).mapToPair(read -> {
        final String library = LibraryIdGenerator.getLibraryName(header, read.getReadGroup());
        DuplicationMetrics metrics = new DuplicationMetrics();
        metrics.LIBRARY = library;
        if (read.isUnmapped()) {
            ++metrics.UNMAPPED_READS;
        } else if (!read.isPaired() || read.mateIsUnmapped()) {
            ++metrics.UNPAIRED_READS_EXAMINED;
        } else {
            ++metrics.READ_PAIRS_EXAMINED;
        }
        if (read.isDuplicate()) {
            if (!read.isPaired() || read.mateIsUnmapped()) {
                ++metrics.UNPAIRED_READ_DUPLICATES;
            } else {
                ++metrics.READ_PAIR_DUPLICATES;
            }
        }
        if (read.hasAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME)) {
            metrics.READ_PAIR_OPTICAL_DUPLICATES += read.getAttributeAsInteger(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME);
        }
        return new Tuple2<>(library, metrics);
    }).foldByKey(new DuplicationMetrics(), (metricsSum, m) -> {
        if (metricsSum.LIBRARY == null) {
            metricsSum.LIBRARY = m.LIBRARY;
        }
        // This should never happen, as we grouped by key using library as the key.
        if (!metricsSum.LIBRARY.equals(m.LIBRARY)) {
            throw new GATKException("Two different libraries encountered while summing metrics: " + metricsSum.LIBRARY + " and " + m.LIBRARY);
        }
        metricsSum.UNMAPPED_READS += m.UNMAPPED_READS;
        metricsSum.UNPAIRED_READS_EXAMINED += m.UNPAIRED_READS_EXAMINED;
        metricsSum.READ_PAIRS_EXAMINED += m.READ_PAIRS_EXAMINED;
        metricsSum.UNPAIRED_READ_DUPLICATES += m.UNPAIRED_READ_DUPLICATES;
        metricsSum.READ_PAIR_DUPLICATES += m.READ_PAIR_DUPLICATES;
        metricsSum.READ_PAIR_OPTICAL_DUPLICATES += m.READ_PAIR_OPTICAL_DUPLICATES;
        return metricsSum;
    }).mapValues(metrics -> {
        DuplicationMetrics copy = metrics.copy();
        copy.READ_PAIRS_EXAMINED = metrics.READ_PAIRS_EXAMINED / 2;
        copy.READ_PAIR_DUPLICATES = metrics.READ_PAIR_DUPLICATES / 2;
        copy.calculateDerivedMetrics();
        if (copy.ESTIMATED_LIBRARY_SIZE == null) {
            copy.ESTIMATED_LIBRARY_SIZE = 0L;
        }
        return copy;
    });
}
Also used : java.util(java.util) ReadCoordinateComparator(org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator) MetricsFile(htsjdk.samtools.metrics.MetricsFile) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Tuple2(scala.Tuple2) SAMFileHeader(htsjdk.samtools.SAMFileHeader) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Collectors(java.util.stream.Collectors) AuthHolder(org.broadinstitute.hellbender.engine.AuthHolder) Serializable(java.io.Serializable) ReadUtils(org.broadinstitute.hellbender.utils.read.ReadUtils) MetricsUtils(org.broadinstitute.hellbender.metrics.MetricsUtils) org.broadinstitute.hellbender.utils.read.markduplicates(org.broadinstitute.hellbender.utils.read.markduplicates) Utils(org.broadinstitute.hellbender.utils.Utils) StreamSupport(java.util.stream.StreamSupport) com.google.common.collect(com.google.common.collect) JavaRDD(org.apache.spark.api.java.JavaRDD) Tuple2(scala.Tuple2) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 4 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReferenceUtils method loadFastaDictionary.

/**
     * Given an InputStream connected to a fasta dictionary, returns its sequence dictionary
     *
     * Note: does not close the InputStream it's passed
     *
     * @param fastaDictionaryStream InputStream connected to a fasta dictionary
     * @return the SAMSequenceDictionary from the fastaDictionaryStream
     */
public static SAMSequenceDictionary loadFastaDictionary(final InputStream fastaDictionaryStream) {
    // Don't close the reader when we're done, since we don't want to close the client's InputStream for them
    final BufferedLineReader reader = new BufferedLineReader(fastaDictionaryStream);
    final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
    final SAMFileHeader header = codec.decode(reader, fastaDictionaryStream.toString());
    return header.getSequenceDictionary();
}
Also used : SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BufferedLineReader(htsjdk.samtools.util.BufferedLineReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 5 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class AddCommentsToBamIntegrationTest method testAddCommentsToBam.

@Test
public void testAddCommentsToBam() throws Exception {
    final File outputFile = BaseTest.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    runIt(BAM_FILE, outputFile, commentList);
    final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile);
    // The original comments are massaged when they're added to the header. Perform the same massaging here,
    // and then compare the lists
    final List<String> massagedComments = new LinkedList<>();
    for (final String comment : commentList) {
        massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment);
    }
    Assert.assertEquals(newHeader.getComments(), massagedComments);
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) LinkedList(java.util.LinkedList) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10