use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class CollectRnaSeqMetricsTest method basic.
@Test
public void basic() throws Exception {
final String sequence = "chr1";
final String ignoredSequence = "chrM";
// Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
// Set seed so that strandedness is consistent among runs.
builder.setRandomSeed(0);
final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
builder.addPair("pair1", sequenceIndex, 45, 475);
builder.addPair("pair2", sequenceIndex, 90, 225);
builder.addPair("pair3", sequenceIndex, 120, 600);
builder.addFrag("frag1", sequenceIndex, 150, true);
builder.addFrag("frag2", sequenceIndex, 450, true);
builder.addFrag("frag3", sequenceIndex, 225, false);
builder.addPair("rrnaPair", sequenceIndex, 400, 500);
builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
final File samFile = BaseTest.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
try (final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec);
}
// Create an interval list with one ribosomal interval.
final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
rRnaIntervalList.add(rRnaInterval);
final File rRnaIntervalsFile = BaseTest.createTempFile("tmp.rRna.", ".interval_list");
rRnaIntervalList.write(rRnaIntervalsFile);
// Generate the metrics.
final File metricsFile = BaseTest.createTempFile("tmp.", ".rna_metrics");
final String[] args = new String[] { "--input", samFile.getAbsolutePath(), "--output", metricsFile.getAbsolutePath(), "--REF_FLAT", getRefFlatFile(sequence).getAbsolutePath(), "--RIBOSOMAL_INTERVALS", rRnaIntervalsFile.getAbsolutePath(), "--STRAND_SPECIFICITY", "SECOND_READ_TRANSCRIPTION_STRAND", "--IGNORE_SEQUENCE", ignoredSequence };
runCommandLine(args);
final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(metricsFile));
final RnaSeqMetrics metrics = output.getMetrics().get(0);
Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396);
Assert.assertEquals(metrics.PF_BASES, 432);
Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L);
Assert.assertEquals(metrics.CODING_BASES, 136);
Assert.assertEquals(metrics.UTR_BASES, 51);
Assert.assertEquals(metrics.INTRONIC_BASES, 50);
Assert.assertEquals(metrics.INTERGENIC_BASES, 51);
Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3);
Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4);
Assert.assertEquals(metrics.IGNORED_READS, 1);
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class ReadsSparkSinkUnitTest method assertReadsAreSorted.
private static void assertReadsAreSorted(SAMFileHeader header, List<GATKRead> writtenReads) {
final SAMRecordCoordinateComparator comparator = new SAMRecordCoordinateComparator();
// Assert that the reads are sorted.
final int size = writtenReads.size();
for (int i = 0; i < size - 1; ++i) {
final SAMRecord smaller = writtenReads.get(i).convertToSAMRecord(header);
final SAMRecord larger = writtenReads.get(i + 1).convertToSAMRecord(header);
final int compare = comparator.compare(smaller, larger);
Assert.assertTrue(compare < 0, "Reads are out of order (compare=" + compare + "): " + smaller.getSAMString() + " and " + larger.getSAMString());
}
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class MultiLevelCollectorTest method multilevelCollectorTest.
@Test(dataProvider = "variedAccumulationLevels")
public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) {
final SamReader in = SamReaderFactory.makeDefault().open(TESTFILE);
final RecordCountMultiLevelCollector collector = new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups());
for (final SAMRecord rec : in) {
collector.acceptRecord(rec, null);
}
collector.finish();
int totalProcessed = 0;
int totalMetrics = 0;
for (final MetricAccumulationLevel level : accumulationLevels) {
final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level);
for (final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) {
final TotalNumberMetric metric = collector.getUnitsToMetrics().get(entry.getKey());
Assert.assertEquals(entry.getValue(), metric.TALLY);
Assert.assertTrue(metric.FINISHED);
totalProcessed += metric.TALLY;
totalMetrics += 1;
}
}
Assert.assertEquals(collector.getUnitsToMetrics().size(), totalMetrics);
Assert.assertEquals(totalProcessed, collector.getNumProcessed());
CloserUtil.close(in);
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class MultiLevelReducibleCollectorUnitTest method multilevelCollectorTest.
@Test(dataProvider = "variedAccumulationLevels")
public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) {
final SamReader in = SamReaderFactory.makeDefault().open(TESTFILE);
final RecordCountMultiLevelCollector collector1 = new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups());
final RecordCountMultiLevelCollector collector2 = new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups());
//distribute the reads across the two collectors
int count = 1;
for (final SAMRecord rec : in) {
if (count % 2 == 0) {
collector1.acceptRecord(rec, null);
} else {
collector2.acceptRecord(rec, null);
}
count++;
}
collector1.finish();
collector2.finish();
// combine the results into collector1
collector1.combine(collector2);
int totalProcessed = 0;
int totalMetrics = 0;
for (final MetricAccumulationLevel level : accumulationLevels) {
final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level);
for (final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) {
final TotalNumberMetric metric = collector1.getUnitsToMetrics().get(entry.getKey());
Assert.assertEquals(entry.getValue(), metric.TALLY);
Assert.assertTrue(metric.FINISHED);
totalProcessed += metric.TALLY;
totalMetrics += 1;
}
}
Assert.assertEquals(collector1.getUnitsToMetrics().size(), totalMetrics);
Assert.assertEquals(totalProcessed, collector1.getNumProcessed());
CloserUtil.close(in);
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class MostDistantPrimaryAlignmentSelectionStrategy method pickPrimaryAlignment.
@Override
public void pickPrimaryAlignment(final HitsForInsert hitsForInsert) {
final BestEndAlignmentsAccumulator firstEndBest = new BestEndAlignmentsAccumulator();
final BestEndAlignmentsAccumulator secondEndBest = new BestEndAlignmentsAccumulator();
final CollectionUtil.MultiMap<Integer, SAMRecord> firstEndBySequence = new CollectionUtil.MultiMap<>();
final BestPairAlignmentsAccumulator pairBest = new BestPairAlignmentsAccumulator();
for (final SAMRecord rec : hitsForInsert.firstOfPairOrFragment) {
if (rec.getReadUnmappedFlag())
throw new IllegalStateException();
firstEndBest.considerBest(rec);
firstEndBySequence.append(rec.getReferenceIndex(), rec);
}
for (final SAMRecord secondEnd : hitsForInsert.secondOfPair) {
if (secondEnd.getReadUnmappedFlag())
throw new IllegalStateException();
secondEndBest.considerBest(secondEnd);
final Collection<SAMRecord> firstEnds = firstEndBySequence.get(secondEnd.getReferenceIndex());
if (firstEnds != null) {
for (final SAMRecord firstEnd : firstEnds) {
pairBest.considerBest(firstEnd, secondEnd);
}
}
}
final SAMRecord bestFirstEnd;
final SAMRecord bestSecondEnd;
if (pairBest.hasBest()) {
final Map.Entry<SAMRecord, SAMRecord> pairEntry = pickRandomlyFromList(pairBest.bestAlignmentPairs);
bestFirstEnd = pairEntry.getKey();
bestSecondEnd = pairEntry.getValue();
} else {
if (firstEndBest.hasBest()) {
bestFirstEnd = pickRandomlyFromList(firstEndBest.bestAlignments);
} else {
bestFirstEnd = null;
}
if (secondEndBest.hasBest()) {
bestSecondEnd = pickRandomlyFromList(secondEndBest.bestAlignments);
} else {
bestSecondEnd = null;
}
}
if (hitsForInsert.firstOfPairOrFragment.isEmpty() != (bestFirstEnd == null)) {
throw new IllegalStateException("Should not happen");
}
if (hitsForInsert.secondOfPair.isEmpty() != (bestSecondEnd == null)) {
throw new IllegalStateException("Should not happen");
}
if (bestFirstEnd != null) {
moveToHead(hitsForInsert.firstOfPairOrFragment, bestFirstEnd);
}
if (bestSecondEnd != null) {
moveToHead(hitsForInsert.secondOfPair, bestSecondEnd);
}
hitsForInsert.setPrimaryAlignment(0);
// No non-primary alignments for one of the ends so nothing to do.
if (hitsForInsert.firstOfPairOrFragment.size() <= 1 || hitsForInsert.secondOfPair.size() <= 1)
return;
final int amountToSlide = hitsForInsert.firstOfPairOrFragment.size() - 1;
for (int i = 0; i < amountToSlide; ++i) {
hitsForInsert.secondOfPair.add(1, null);
}
}
Aggregations