use of htsjdk.samtools.metrics.MetricsFile in project gatk by broadinstitute.
the class CollectTargetedMetrics method doWork.
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
@Override
protected Object doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null)
IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
}
final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return null;
}
use of htsjdk.samtools.metrics.MetricsFile in project gatk by broadinstitute.
the class CollectAlignmentSummaryMetricsTest method testNoReference.
@Test
public void testNoReference() throws IOException {
final File input = new File(TEST_DATA_DIR, "summary_alignment_stats_test.sam");
final File outfile = BaseTest.createTempFile("alignmentMetrics", ".txt");
final String[] args = new String[] { "--input", input.getAbsolutePath(), "--output", outfile.getAbsolutePath() };
runCommandLine(args);
final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0);
switch(metrics.CATEGORY) {
case FIRST_OF_PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 9);
Assert.assertEquals(metrics.PF_READS, 7);
Assert.assertEquals(metrics.PF_NOISE_READS, 1);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 0);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 0);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 0.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 0);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.0);
Assert.assertEquals(metrics.BAD_CYCLES, 19);
break;
case SECOND_OF_PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 9);
Assert.assertEquals(metrics.PF_READS, 9);
Assert.assertEquals(metrics.PF_NOISE_READS, 1);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 0);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 0);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 0.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 0);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.0);
Assert.assertEquals(metrics.BAD_CYCLES, 3);
break;
case PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 18);
Assert.assertEquals(metrics.PF_READS, 16);
Assert.assertEquals(metrics.PF_NOISE_READS, 2);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 0);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 0);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 0.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 0);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.0);
Assert.assertEquals(metrics.BAD_CYCLES, 22);
break;
case UNPAIRED:
default:
Assert.fail("Data does not contain this category: " + metrics.CATEGORY);
}
}
}
use of htsjdk.samtools.metrics.MetricsFile in project gatk by broadinstitute.
the class CollectAlignmentSummaryMetricsTest method test.
@Test(dataProvider = "metricsfiles")
public void test(final String inputFile, final String referenceFile) throws IOException {
final File input = new File(TEST_DATA_DIR, inputFile);
final File reference = new File(TEST_DATA_DIR, referenceFile);
final File outfile = BaseTest.createTempFile("alignmentMetrics", ".txt");
final String[] args = new String[] { "--input", input.getAbsolutePath(), "--output", outfile.getAbsolutePath(), "--reference", reference.getAbsolutePath() };
runCommandLine(args);
final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0);
switch(metrics.CATEGORY) {
case FIRST_OF_PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 9);
Assert.assertEquals(metrics.PF_READS, 7);
Assert.assertEquals(metrics.PF_NOISE_READS, 1);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 3);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 59);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 19.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 303);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*58D/303D*/
0.191419);
Assert.assertEquals(metrics.BAD_CYCLES, 19);
break;
case SECOND_OF_PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 9);
Assert.assertEquals(metrics.PF_READS, 9);
Assert.assertEquals(metrics.PF_NOISE_READS, 1);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 7);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 239);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 707);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*19D/707D*/
0.026874);
Assert.assertEquals(metrics.BAD_CYCLES, 3);
break;
case PAIR:
Assert.assertEquals(metrics.TOTAL_READS, 18);
Assert.assertEquals(metrics.PF_READS, 16);
Assert.assertEquals(metrics.PF_NOISE_READS, 2);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 10);
Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 298);
Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 1010);
Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*77D/1010D*/
0.076238);
Assert.assertEquals(metrics.BAD_CYCLES, 22);
break;
case UNPAIRED:
default:
Assert.fail("Data does not contain this category: " + metrics.CATEGORY);
}
}
}
use of htsjdk.samtools.metrics.MetricsFile in project gatk by broadinstitute.
the class CollectRnaSeqMetricsTest method basic.
@Test
public void basic() throws Exception {
final String sequence = "chr1";
final String ignoredSequence = "chrM";
// Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
// Set seed so that strandedness is consistent among runs.
builder.setRandomSeed(0);
final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
builder.addPair("pair1", sequenceIndex, 45, 475);
builder.addPair("pair2", sequenceIndex, 90, 225);
builder.addPair("pair3", sequenceIndex, 120, 600);
builder.addFrag("frag1", sequenceIndex, 150, true);
builder.addFrag("frag2", sequenceIndex, 450, true);
builder.addFrag("frag3", sequenceIndex, 225, false);
builder.addPair("rrnaPair", sequenceIndex, 400, 500);
builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
final File samFile = BaseTest.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
try (final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec);
}
// Create an interval list with one ribosomal interval.
final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
rRnaIntervalList.add(rRnaInterval);
final File rRnaIntervalsFile = BaseTest.createTempFile("tmp.rRna.", ".interval_list");
rRnaIntervalList.write(rRnaIntervalsFile);
// Generate the metrics.
final File metricsFile = BaseTest.createTempFile("tmp.", ".rna_metrics");
final String[] args = new String[] { "--input", samFile.getAbsolutePath(), "--output", metricsFile.getAbsolutePath(), "--REF_FLAT", getRefFlatFile(sequence).getAbsolutePath(), "--RIBOSOMAL_INTERVALS", rRnaIntervalsFile.getAbsolutePath(), "--STRAND_SPECIFICITY", "SECOND_READ_TRANSCRIPTION_STRAND", "--IGNORE_SEQUENCE", ignoredSequence };
runCommandLine(args);
final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(metricsFile));
final RnaSeqMetrics metrics = output.getMetrics().get(0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396);
Assert.assertEquals(metrics.PF_BASES, 432);
Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L);
Assert.assertEquals(metrics.CODING_BASES, 136);
Assert.assertEquals(metrics.UTR_BASES, 51);
Assert.assertEquals(metrics.INTRONIC_BASES, 50);
Assert.assertEquals(metrics.INTERGENIC_BASES, 51);
Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3);
Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4);
Assert.assertEquals(metrics.IGNORED_READS, 1);
}
use of htsjdk.samtools.metrics.MetricsFile in project gatk by broadinstitute.
the class CollectRrbsMetricsTest method getSummaryFile.
private MetricsFile<RrbsSummaryMetrics, ?> getSummaryFile(final String input, final String reference, final String prefix, final List<String> sequences) throws Exception {
final List<String> argList = new ArrayList<>();
argList.add("--input");
argList.add(input);
argList.add("--METRICS_FILE_PREFIX");
argList.add(prefix);
argList.add("--reference");
argList.add(reference);
for (final String sequence : sequences) {
argList.add("--SEQUENCE_NAMES");
argList.add(sequence);
}
runCommandLine(argList);
final MetricsFile<RrbsSummaryMetrics, ?> retVal = new MetricsFile<RrbsSummaryMetrics, Integer>();
retVal.read(new FileReader(prefix + ".rrbs_summary_metrics"));
return retVal;
}
Aggregations