use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class FindBadGenomicKmersSpark method runTool.
/** Get the list of high copy number kmers in the reference, and write them to a file. */
@Override
protected void runTool(final JavaSparkContext ctx) {
final SAMFileHeader hdr = getHeaderForReads();
SAMSequenceDictionary dict = null;
if (hdr != null)
dict = hdr.getSequenceDictionary();
final PipelineOptions options = getAuthenticatedGCSOptions();
final ReferenceMultiSource referenceMultiSource = getReference();
Collection<SVKmer> killList = findBadGenomicKmers(ctx, kSize, maxDUSTScore, referenceMultiSource, options, dict);
if (highCopyFastaFilename != null) {
killList = uniquify(killList, processFasta(kSize, maxDUSTScore, highCopyFastaFilename, options));
}
SVUtils.writeKmersFile(kSize, outputFile, killList);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class StructuralVariationDiscoveryPipelineSpark method runTool.
@Override
protected void runTool(final JavaSparkContext ctx) {
final SAMFileHeader header = getHeaderForReads();
final PipelineOptions pipelineOptions = getAuthenticatedGCSOptions();
// gather evidence, run assembly, and align
final List<AlignedAssemblyOrExcuse> alignedAssemblyOrExcuseList = FindBreakpointEvidenceSpark.gatherEvidenceAndWriteContigSamFile(ctx, evidenceAndAssemblyArgs, header, getUnfilteredReads(), outputSAM, localLogger);
if (alignedAssemblyOrExcuseList.isEmpty())
return;
// parse the contig alignments and extract necessary information
@SuppressWarnings("unchecked") final JavaRDD<AlignedContig> parsedAlignments = new InMemoryAlignmentParser(ctx, alignedAssemblyOrExcuseList, header, localLogger).getAlignedContigs();
if (parsedAlignments.isEmpty())
return;
// discover variants and write to vcf
DiscoverVariantsFromContigAlignmentsSAMSpark.discoverVariantsAndWriteVCF(parsedAlignments, discoverStageArgs.fastaReference, ctx.broadcast(getReference()), pipelineOptions, vcfOutputFileName, localLogger);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class MarkDuplicatesSparkUtils method generateMetrics.
static JavaPairRDD<String, DuplicationMetrics> generateMetrics(final SAMFileHeader header, final JavaRDD<GATKRead> reads) {
return reads.filter(read -> !read.isSecondaryAlignment() && !read.isSupplementaryAlignment()).mapToPair(read -> {
final String library = LibraryIdGenerator.getLibraryName(header, read.getReadGroup());
DuplicationMetrics metrics = new DuplicationMetrics();
metrics.LIBRARY = library;
if (read.isUnmapped()) {
++metrics.UNMAPPED_READS;
} else if (!read.isPaired() || read.mateIsUnmapped()) {
++metrics.UNPAIRED_READS_EXAMINED;
} else {
++metrics.READ_PAIRS_EXAMINED;
}
if (read.isDuplicate()) {
if (!read.isPaired() || read.mateIsUnmapped()) {
++metrics.UNPAIRED_READ_DUPLICATES;
} else {
++metrics.READ_PAIR_DUPLICATES;
}
}
if (read.hasAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME)) {
metrics.READ_PAIR_OPTICAL_DUPLICATES += read.getAttributeAsInteger(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME);
}
return new Tuple2<>(library, metrics);
}).foldByKey(new DuplicationMetrics(), (metricsSum, m) -> {
if (metricsSum.LIBRARY == null) {
metricsSum.LIBRARY = m.LIBRARY;
}
// This should never happen, as we grouped by key using library as the key.
if (!metricsSum.LIBRARY.equals(m.LIBRARY)) {
throw new GATKException("Two different libraries encountered while summing metrics: " + metricsSum.LIBRARY + " and " + m.LIBRARY);
}
metricsSum.UNMAPPED_READS += m.UNMAPPED_READS;
metricsSum.UNPAIRED_READS_EXAMINED += m.UNPAIRED_READS_EXAMINED;
metricsSum.READ_PAIRS_EXAMINED += m.READ_PAIRS_EXAMINED;
metricsSum.UNPAIRED_READ_DUPLICATES += m.UNPAIRED_READ_DUPLICATES;
metricsSum.READ_PAIR_DUPLICATES += m.READ_PAIR_DUPLICATES;
metricsSum.READ_PAIR_OPTICAL_DUPLICATES += m.READ_PAIR_OPTICAL_DUPLICATES;
return metricsSum;
}).mapValues(metrics -> {
DuplicationMetrics copy = metrics.copy();
copy.READ_PAIRS_EXAMINED = metrics.READ_PAIRS_EXAMINED / 2;
copy.READ_PAIR_DUPLICATES = metrics.READ_PAIR_DUPLICATES / 2;
copy.calculateDerivedMetrics();
if (copy.ESTIMATED_LIBRARY_SIZE == null) {
copy.ESTIMATED_LIBRARY_SIZE = 0L;
}
return copy;
});
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReferenceUtils method loadFastaDictionary.
/**
* Given an InputStream connected to a fasta dictionary, returns its sequence dictionary
*
* Note: does not close the InputStream it's passed
*
* @param fastaDictionaryStream InputStream connected to a fasta dictionary
* @return the SAMSequenceDictionary from the fastaDictionaryStream
*/
public static SAMSequenceDictionary loadFastaDictionary(final InputStream fastaDictionaryStream) {
// Don't close the reader when we're done, since we don't want to close the client's InputStream for them
final BufferedLineReader reader = new BufferedLineReader(fastaDictionaryStream);
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
final SAMFileHeader header = codec.decode(reader, fastaDictionaryStream.toString());
return header.getSequenceDictionary();
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class AddCommentsToBamIntegrationTest method testAddCommentsToBam.
@Test
public void testAddCommentsToBam() throws Exception {
final File outputFile = BaseTest.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
runIt(BAM_FILE, outputFile, commentList);
final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile);
// The original comments are massaged when they're added to the header. Perform the same massaging here,
// and then compare the lists
final List<String> massagedComments = new LinkedList<>();
for (final String comment : commentList) {
massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment);
}
Assert.assertEquals(newHeader.getComments(), massagedComments);
}
Aggregations