use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class ArtificialBAMBuilderUnitTest method testBamProvider.
@Test(dataProvider = "ArtificialBAMBuilderUnitTestProvider")
public void testBamProvider(final ArtificialBAMBuilder bamBuilder, int readLength, int skips, int start, int nSamples, int nReadsPerLocus, int nLoci) throws IOException {
Assert.assertEquals(bamBuilder.getReadLength(), readLength);
Assert.assertEquals(bamBuilder.getSkipNLoci(), skips);
Assert.assertEquals(bamBuilder.getAlignmentStart(), start);
Assert.assertEquals(bamBuilder.getAlignmentEnd(), start + nLoci * (skips + 1) + readLength);
Assert.assertEquals(bamBuilder.getNSamples(), nSamples);
Assert.assertEquals(bamBuilder.getnReadsPerLocus(), nReadsPerLocus);
Assert.assertEquals(bamBuilder.getnLoci(), nLoci);
Assert.assertEquals(bamBuilder.getSamples().size(), bamBuilder.getNSamples());
Assert.assertNull(bamBuilder.getReference());
final List<GATKRead> reads = bamBuilder.makeReads();
Assert.assertEquals(reads.size(), bamBuilder.expectedNumberOfReads());
for (final GATKRead read : reads) {
assertGoodRead(read.convertToSAMRecord(bamBuilder.getHeader()), bamBuilder);
}
final File bam = bamBuilder.makeTemporaryBAMFile();
final SamReader reader = SamReaderFactory.makeDefault().open(bam);
Assert.assertTrue(reader.hasIndex());
final Iterator<SAMRecord> bamIt = reader.iterator();
int nReadsFromBam = 0;
int lastStart = -1;
while (bamIt.hasNext()) {
final SAMRecord read = bamIt.next();
assertGoodRead(read, bamBuilder);
nReadsFromBam++;
Assert.assertTrue(read.getAlignmentStart() >= lastStart);
lastStart = read.getAlignmentStart();
}
Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class ReadUtilsUnitTest method readHasNoAssignedPositionTestData.
@DataProvider(name = "ReadHasNoAssignedPositionTestData")
public Object[][] readHasNoAssignedPositionTestData() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
// To test the "null contig" cases, we need to use a Google Genomics Read, since SAMRecord doesn't allow it
final Read unmappedGoogleReadWithNullContigSetStart = new Read();
unmappedGoogleReadWithNullContigSetStart.setAlignment(new LinearAlignment());
unmappedGoogleReadWithNullContigSetStart.getAlignment().setPosition(new Position());
unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setReferenceName(null);
unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setPosition(10l);
final GATKRead unmappedReadWithNullContigSetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigSetStart);
final Read unmappedGoogleReadWithNullContigUnsetStart = new Read();
unmappedGoogleReadWithNullContigUnsetStart.setAlignment(new LinearAlignment());
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().setPosition(new Position());
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setReferenceName(null);
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setPosition(Long.valueOf(ReadConstants.UNSET_POSITION));
final GATKRead unmappedReadWithNullContigUnsetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigUnsetStart);
// We'll also test the improbable case of a SAMRecord marked as mapped, but with an unset contig/start
final SAMRecord mappedSAMWithUnsetContigSetStart = new SAMRecord(header);
mappedSAMWithUnsetContigSetStart.setReferenceName(ReadConstants.UNSET_CONTIG);
mappedSAMWithUnsetContigSetStart.setAlignmentStart(10);
mappedSAMWithUnsetContigSetStart.setReadUnmappedFlag(false);
final GATKRead mappedReadWithUnsetContigSetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithUnsetContigSetStart);
final SAMRecord mappedSAMWithSetContigUnsetStart = new SAMRecord(header);
mappedSAMWithSetContigUnsetStart.setReferenceName("1");
mappedSAMWithSetContigUnsetStart.setAlignmentStart(ReadConstants.UNSET_POSITION);
mappedSAMWithSetContigUnsetStart.setReadUnmappedFlag(false);
final GATKRead mappedReadWithSetContigUnsetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithSetContigUnsetStart);
return new Object[][] { // Mapped read with position
{ ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10), false }, // Basic unmapped read with no position
{ ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set position (contig and start)
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", 10, new byte[] { 'A' }, new byte[] { 30 }), false }, // Unmapped read with null contig, set start
{ unmappedReadWithNullContigSetStart, true }, // Unmapped read with "*" contig, set start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, 10, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set contig, unset start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with null contig, unset start
{ unmappedReadWithNullContigUnsetStart, true }, // Unmapped read with "*" contig, unset start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // "Mapped" read with unset contig, set start
{ mappedReadWithUnsetContigSetStart, true }, // "Mapped" read with set contig, unset start
{ mappedReadWithSetContigUnsetStart, true } };
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class ReadUtilsUnitTest method testCreateSAMWriter.
@Test(dataProvider = "createSAMWriter")
public void testCreateSAMWriter(final File bamFile, final boolean preSorted, final boolean createIndex, final boolean createMD5, final boolean expectIndex) throws Exception {
final File outputFile = createTempFile("samWriterTest", ".bam");
try (final SamReader samReader = SamReaderFactory.makeDefault().open(bamFile)) {
final SAMFileHeader header = samReader.getFileHeader();
if (expectIndex) {
// ensure test condition
Assert.assertEquals(expectIndex, header.getSortOrder() == SAMFileHeader.SortOrder.coordinate);
}
try (final SAMFileWriter samWriter = ReadUtils.createCommonSAMWriter(outputFile, null, samReader.getFileHeader(), preSorted, createIndex, createMD5)) {
final Iterator<SAMRecord> samRecIt = samReader.iterator();
while (samRecIt.hasNext()) {
samWriter.addAlignment(samRecIt.next());
}
}
}
final File md5File = new File(outputFile.getAbsolutePath() + ".md5");
if (md5File.exists()) {
md5File.deleteOnExit();
}
Assert.assertEquals(expectIndex, null != SamFiles.findIndex(outputFile));
Assert.assertEquals(createMD5, md5File.exists());
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class SinglePassSamProgram method makeItSo.
public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<SinglePassSamProgram> programs) {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceSequenceFileWalker walker;
if (referenceSequence == null) {
walker = null;
} else {
IOUtil.assertFileIsReadable(referenceSequence);
walker = new ReferenceSequenceFileWalker(referenceSequence);
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), walker.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
{
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.coordinate) {
if (assumeSorted) {
logger.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
} else {
throw new UserException("File " + input.getAbsolutePath() + " should be coordinate sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be coordinate sorted you may pass ASSUME_SORTED=true");
}
}
}
// Call the abstract setup method!
boolean anyUseNoRefReads = false;
for (final SinglePassSamProgram program : programs) {
program.setup(in.getFileHeader(), input);
anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
}
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord rec : in) {
final ReferenceSequence ref;
if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
ref = null;
} else {
ref = walker.get(rec.getReferenceIndex());
}
for (final SinglePassSamProgram program : programs) {
program.acceptRead(rec, ref);
}
progress.record(rec);
// See if we need to terminate early?
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
// And see if we're into the unmapped reads at the end
if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
break;
}
}
CloserUtil.close(in);
for (final SinglePassSamProgram program : programs) {
program.finish();
}
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class CollectJumpingLibraryMetrics method doWork.
/**
* Calculates the detailed statistics about the jumping library and then generates the results.
*/
@Override
protected Object doWork() {
for (File f : INPUT) {
IOUtil.assertFileIsReadable(f);
}
IOUtil.assertFileIsWritable(OUTPUT);
Histogram<Integer> innieHistogram = new Histogram<>();
Histogram<Integer> outieHistogram = new Histogram<>();
int fragments = 0;
int innies = 0;
int outies = 0;
int innieDupes = 0;
int outieDupes = 0;
int crossChromPairs = 0;
int superSized = 0;
int tandemPairs = 0;
double chimeraSizeMinimum = Math.max(getOutieMode(), (double) CHIMERA_KB_MIN);
for (File f : INPUT) {
SamReader reader = SamReaderFactory.makeDefault().open(f);
if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("SAM file must " + f.getName() + " must be sorted in coordinate order");
}
for (SAMRecord sam : reader) {
// We're getting all our info from the first of each pair.
if (!sam.getFirstOfPairFlag()) {
continue;
}
// Ignore unmapped read pairs
if (sam.getReadUnmappedFlag()) {
if (!sam.getMateUnmappedFlag()) {
fragments++;
continue;
}
// If both ends are unmapped and we've hit unaligned reads we're done
if (sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
break;
}
continue;
}
if (sam.getMateUnmappedFlag()) {
fragments++;
continue;
}
// Ignore low-quality reads. If we don't have the mate mapping quality, assume it's OK
if ((sam.getAttribute(SAMTag.MQ.name()) != null && sam.getIntegerAttribute(SAMTag.MQ.name()) < MINIMUM_MAPPING_QUALITY) || sam.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
continue;
}
final int absInsertSize = Math.abs(sam.getInferredInsertSize());
if (absInsertSize > chimeraSizeMinimum) {
superSized++;
} else if (sam.getMateNegativeStrandFlag() == sam.getReadNegativeStrandFlag()) {
tandemPairs++;
} else if (!sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
crossChromPairs++;
} else {
final PairOrientation pairOrientation = SamPairUtil.getPairOrientation(sam);
if (pairOrientation == PairOrientation.RF) {
outieHistogram.increment(absInsertSize);
outies++;
if (sam.getDuplicateReadFlag()) {
outieDupes++;
}
} else if (pairOrientation == PairOrientation.FR) {
innieHistogram.increment(absInsertSize);
innies++;
if (sam.getDuplicateReadFlag()) {
innieDupes++;
}
} else {
throw new IllegalStateException("This should never happen");
}
}
}
CloserUtil.close(reader);
}
MetricsFile<JumpingLibraryMetrics, Integer> metricsFile = getMetricsFile();
JumpingLibraryMetrics metrics = new JumpingLibraryMetrics();
metrics.JUMP_PAIRS = outies;
metrics.JUMP_DUPLICATE_PAIRS = outieDupes;
metrics.JUMP_DUPLICATE_PCT = outies != 0 ? outieDupes / (double) outies : 0;
metrics.JUMP_LIBRARY_SIZE = (outies > 0 && outieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(outies, outies - outieDupes) : 0;
outieHistogram.trimByTailLimit(TAIL_LIMIT);
metrics.JUMP_MEAN_INSERT_SIZE = outieHistogram.getMean();
metrics.JUMP_STDEV_INSERT_SIZE = outieHistogram.getStandardDeviation();
metrics.NONJUMP_PAIRS = innies;
metrics.NONJUMP_DUPLICATE_PAIRS = innieDupes;
metrics.NONJUMP_DUPLICATE_PCT = innies != 0 ? innieDupes / (double) innies : 0;
metrics.NONJUMP_LIBRARY_SIZE = (innies > 0 && innieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(innies, innies - innieDupes) : 0;
innieHistogram.trimByTailLimit(TAIL_LIMIT);
metrics.NONJUMP_MEAN_INSERT_SIZE = innieHistogram.getMean();
metrics.NONJUMP_STDEV_INSERT_SIZE = innieHistogram.getStandardDeviation();
metrics.CHIMERIC_PAIRS = crossChromPairs + superSized + tandemPairs;
metrics.FRAGMENTS = fragments;
double totalPairs = outies + innies + metrics.CHIMERIC_PAIRS;
metrics.PCT_JUMPS = totalPairs != 0 ? outies / totalPairs : 0;
metrics.PCT_NONJUMPS = totalPairs != 0 ? innies / totalPairs : 0;
metrics.PCT_CHIMERAS = totalPairs != 0 ? metrics.CHIMERIC_PAIRS / totalPairs : 0;
metricsFile.addMetric(metrics);
metricsFile.write(OUTPUT);
return null;
}
Aggregations