Search in sources :

Example 16 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class ArtificialBAMBuilderUnitTest method testBamProvider.

@Test(dataProvider = "ArtificialBAMBuilderUnitTestProvider")
public void testBamProvider(final ArtificialBAMBuilder bamBuilder, int readLength, int skips, int start, int nSamples, int nReadsPerLocus, int nLoci) throws IOException {
    Assert.assertEquals(bamBuilder.getReadLength(), readLength);
    Assert.assertEquals(bamBuilder.getSkipNLoci(), skips);
    Assert.assertEquals(bamBuilder.getAlignmentStart(), start);
    Assert.assertEquals(bamBuilder.getAlignmentEnd(), start + nLoci * (skips + 1) + readLength);
    Assert.assertEquals(bamBuilder.getNSamples(), nSamples);
    Assert.assertEquals(bamBuilder.getnReadsPerLocus(), nReadsPerLocus);
    Assert.assertEquals(bamBuilder.getnLoci(), nLoci);
    Assert.assertEquals(bamBuilder.getSamples().size(), bamBuilder.getNSamples());
    Assert.assertNull(bamBuilder.getReference());
    final List<GATKRead> reads = bamBuilder.makeReads();
    Assert.assertEquals(reads.size(), bamBuilder.expectedNumberOfReads());
    for (final GATKRead read : reads) {
        assertGoodRead(read.convertToSAMRecord(bamBuilder.getHeader()), bamBuilder);
    }
    final File bam = bamBuilder.makeTemporaryBAMFile();
    final SamReader reader = SamReaderFactory.makeDefault().open(bam);
    Assert.assertTrue(reader.hasIndex());
    final Iterator<SAMRecord> bamIt = reader.iterator();
    int nReadsFromBam = 0;
    int lastStart = -1;
    while (bamIt.hasNext()) {
        final SAMRecord read = bamIt.next();
        assertGoodRead(read, bamBuilder);
        nReadsFromBam++;
        Assert.assertTrue(read.getAlignmentStart() >= lastStart);
        lastStart = read.getAlignmentStart();
    }
    Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 17 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class ReadUtilsUnitTest method readHasNoAssignedPositionTestData.

@DataProvider(name = "ReadHasNoAssignedPositionTestData")
public Object[][] readHasNoAssignedPositionTestData() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    // To test the "null contig" cases, we need to use a Google Genomics Read, since SAMRecord doesn't allow it
    final Read unmappedGoogleReadWithNullContigSetStart = new Read();
    unmappedGoogleReadWithNullContigSetStart.setAlignment(new LinearAlignment());
    unmappedGoogleReadWithNullContigSetStart.getAlignment().setPosition(new Position());
    unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setReferenceName(null);
    unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setPosition(10l);
    final GATKRead unmappedReadWithNullContigSetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigSetStart);
    final Read unmappedGoogleReadWithNullContigUnsetStart = new Read();
    unmappedGoogleReadWithNullContigUnsetStart.setAlignment(new LinearAlignment());
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().setPosition(new Position());
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setReferenceName(null);
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setPosition(Long.valueOf(ReadConstants.UNSET_POSITION));
    final GATKRead unmappedReadWithNullContigUnsetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigUnsetStart);
    // We'll also test the improbable case of a SAMRecord marked as mapped, but with an unset contig/start
    final SAMRecord mappedSAMWithUnsetContigSetStart = new SAMRecord(header);
    mappedSAMWithUnsetContigSetStart.setReferenceName(ReadConstants.UNSET_CONTIG);
    mappedSAMWithUnsetContigSetStart.setAlignmentStart(10);
    mappedSAMWithUnsetContigSetStart.setReadUnmappedFlag(false);
    final GATKRead mappedReadWithUnsetContigSetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithUnsetContigSetStart);
    final SAMRecord mappedSAMWithSetContigUnsetStart = new SAMRecord(header);
    mappedSAMWithSetContigUnsetStart.setReferenceName("1");
    mappedSAMWithSetContigUnsetStart.setAlignmentStart(ReadConstants.UNSET_POSITION);
    mappedSAMWithSetContigUnsetStart.setReadUnmappedFlag(false);
    final GATKRead mappedReadWithSetContigUnsetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithSetContigUnsetStart);
    return new Object[][] { // Mapped read with position
    { ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10), false }, // Basic unmapped read with no position
    { ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set position (contig and start)
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", 10, new byte[] { 'A' }, new byte[] { 30 }), false }, // Unmapped read with null contig, set start
    { unmappedReadWithNullContigSetStart, true }, // Unmapped read with "*" contig, set start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, 10, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set contig, unset start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with null contig, unset start
    { unmappedReadWithNullContigUnsetStart, true }, // Unmapped read with "*" contig, unset start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // "Mapped" read with unset contig, set start
    { mappedReadWithUnsetContigSetStart, true }, // "Mapped" read with set contig, unset start
    { mappedReadWithSetContigUnsetStart, true } };
}
Also used : Read(com.google.api.services.genomics.model.Read) LinearAlignment(com.google.api.services.genomics.model.LinearAlignment) Position(com.google.api.services.genomics.model.Position) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataProvider(org.testng.annotations.DataProvider)

Example 18 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class ReadUtilsUnitTest method testCreateSAMWriter.

@Test(dataProvider = "createSAMWriter")
public void testCreateSAMWriter(final File bamFile, final boolean preSorted, final boolean createIndex, final boolean createMD5, final boolean expectIndex) throws Exception {
    final File outputFile = createTempFile("samWriterTest", ".bam");
    try (final SamReader samReader = SamReaderFactory.makeDefault().open(bamFile)) {
        final SAMFileHeader header = samReader.getFileHeader();
        if (expectIndex) {
            // ensure test condition
            Assert.assertEquals(expectIndex, header.getSortOrder() == SAMFileHeader.SortOrder.coordinate);
        }
        try (final SAMFileWriter samWriter = ReadUtils.createCommonSAMWriter(outputFile, null, samReader.getFileHeader(), preSorted, createIndex, createMD5)) {
            final Iterator<SAMRecord> samRecIt = samReader.iterator();
            while (samRecIt.hasNext()) {
                samWriter.addAlignment(samRecIt.next());
            }
        }
    }
    final File md5File = new File(outputFile.getAbsolutePath() + ".md5");
    if (md5File.exists()) {
        md5File.deleteOnExit();
    }
    Assert.assertEquals(expectIndex, null != SamFiles.findIndex(outputFile));
    Assert.assertEquals(createMD5, md5File.exists());
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 19 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class SinglePassSamProgram method makeItSo.

public static void makeItSo(final File input, final File referenceSequence, final boolean assumeSorted, final long stopAfter, final Collection<SinglePassSamProgram> programs) {
    // Setup the standard inputs
    IOUtil.assertFileIsReadable(input);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    // Optionally load up the reference sequence and double check sequence dictionaries
    final ReferenceSequenceFileWalker walker;
    if (referenceSequence == null) {
        walker = null;
    } else {
        IOUtil.assertFileIsReadable(referenceSequence);
        walker = new ReferenceSequenceFileWalker(referenceSequence);
        if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
            SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), walker.getSequenceDictionary());
        }
    }
    // Check on the sort order of the BAM file
    {
        final SortOrder sort = in.getFileHeader().getSortOrder();
        if (sort != SortOrder.coordinate) {
            if (assumeSorted) {
                logger.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
            } else {
                throw new UserException("File " + input.getAbsolutePath() + " should be coordinate sorted but " + "the header says the sort order is " + sort + ". If you believe the file " + "to be coordinate sorted you may pass ASSUME_SORTED=true");
            }
        }
    }
    // Call the abstract setup method!
    boolean anyUseNoRefReads = false;
    for (final SinglePassSamProgram program : programs) {
        program.setup(in.getFileHeader(), input);
        anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
    }
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord rec : in) {
        final ReferenceSequence ref;
        if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
            ref = null;
        } else {
            ref = walker.get(rec.getReferenceIndex());
        }
        for (final SinglePassSamProgram program : programs) {
            program.acceptRead(rec, ref);
        }
        progress.record(rec);
        // See if we need to terminate early?
        if (stopAfter > 0 && progress.getCount() >= stopAfter) {
            break;
        }
        // And see if we're into the unmapped reads at the end
        if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
            break;
        }
    }
    CloserUtil.close(in);
    for (final SinglePassSamProgram program : programs) {
        program.finish();
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker)

Example 20 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method doWork.

/**
     * Calculates the detailed statistics about the jumping library and then generates the results.
     */
@Override
protected Object doWork() {
    for (File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
    }
    IOUtil.assertFileIsWritable(OUTPUT);
    Histogram<Integer> innieHistogram = new Histogram<>();
    Histogram<Integer> outieHistogram = new Histogram<>();
    int fragments = 0;
    int innies = 0;
    int outies = 0;
    int innieDupes = 0;
    int outieDupes = 0;
    int crossChromPairs = 0;
    int superSized = 0;
    int tandemPairs = 0;
    double chimeraSizeMinimum = Math.max(getOutieMode(), (double) CHIMERA_KB_MIN);
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException("SAM file must " + f.getName() + " must be sorted in coordinate order");
        }
        for (SAMRecord sam : reader) {
            // We're getting all our info from the first of each pair.
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // Ignore unmapped read pairs
            if (sam.getReadUnmappedFlag()) {
                if (!sam.getMateUnmappedFlag()) {
                    fragments++;
                    continue;
                }
                // If both ends are unmapped and we've hit unaligned reads we're done
                if (sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                    break;
                }
                continue;
            }
            if (sam.getMateUnmappedFlag()) {
                fragments++;
                continue;
            }
            // Ignore low-quality reads.  If we don't have the mate mapping quality, assume it's OK
            if ((sam.getAttribute(SAMTag.MQ.name()) != null && sam.getIntegerAttribute(SAMTag.MQ.name()) < MINIMUM_MAPPING_QUALITY) || sam.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
                continue;
            }
            final int absInsertSize = Math.abs(sam.getInferredInsertSize());
            if (absInsertSize > chimeraSizeMinimum) {
                superSized++;
            } else if (sam.getMateNegativeStrandFlag() == sam.getReadNegativeStrandFlag()) {
                tandemPairs++;
            } else if (!sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                crossChromPairs++;
            } else {
                final PairOrientation pairOrientation = SamPairUtil.getPairOrientation(sam);
                if (pairOrientation == PairOrientation.RF) {
                    outieHistogram.increment(absInsertSize);
                    outies++;
                    if (sam.getDuplicateReadFlag()) {
                        outieDupes++;
                    }
                } else if (pairOrientation == PairOrientation.FR) {
                    innieHistogram.increment(absInsertSize);
                    innies++;
                    if (sam.getDuplicateReadFlag()) {
                        innieDupes++;
                    }
                } else {
                    throw new IllegalStateException("This should never happen");
                }
            }
        }
        CloserUtil.close(reader);
    }
    MetricsFile<JumpingLibraryMetrics, Integer> metricsFile = getMetricsFile();
    JumpingLibraryMetrics metrics = new JumpingLibraryMetrics();
    metrics.JUMP_PAIRS = outies;
    metrics.JUMP_DUPLICATE_PAIRS = outieDupes;
    metrics.JUMP_DUPLICATE_PCT = outies != 0 ? outieDupes / (double) outies : 0;
    metrics.JUMP_LIBRARY_SIZE = (outies > 0 && outieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(outies, outies - outieDupes) : 0;
    outieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.JUMP_MEAN_INSERT_SIZE = outieHistogram.getMean();
    metrics.JUMP_STDEV_INSERT_SIZE = outieHistogram.getStandardDeviation();
    metrics.NONJUMP_PAIRS = innies;
    metrics.NONJUMP_DUPLICATE_PAIRS = innieDupes;
    metrics.NONJUMP_DUPLICATE_PCT = innies != 0 ? innieDupes / (double) innies : 0;
    metrics.NONJUMP_LIBRARY_SIZE = (innies > 0 && innieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(innies, innies - innieDupes) : 0;
    innieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.NONJUMP_MEAN_INSERT_SIZE = innieHistogram.getMean();
    metrics.NONJUMP_STDEV_INSERT_SIZE = innieHistogram.getStandardDeviation();
    metrics.CHIMERIC_PAIRS = crossChromPairs + superSized + tandemPairs;
    metrics.FRAGMENTS = fragments;
    double totalPairs = outies + innies + metrics.CHIMERIC_PAIRS;
    metrics.PCT_JUMPS = totalPairs != 0 ? outies / totalPairs : 0;
    metrics.PCT_NONJUMPS = totalPairs != 0 ? innies / totalPairs : 0;
    metrics.PCT_CHIMERAS = totalPairs != 0 ? metrics.CHIMERIC_PAIRS / totalPairs : 0;
    metricsFile.addMetric(metrics);
    metricsFile.write(OUTPUT);
    return null;
}
Also used : Histogram(htsjdk.samtools.util.Histogram) PairOrientation(htsjdk.samtools.SamPairUtil.PairOrientation) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Aggregations

SAMRecord (htsjdk.samtools.SAMRecord)53 SamReader (htsjdk.samtools.SamReader)31 File (java.io.File)20 Test (org.testng.annotations.Test)15 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)12 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)11 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)9 MetricsFile (htsjdk.samtools.metrics.MetricsFile)9 ArrayList (java.util.ArrayList)8 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)6 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SamReaderFactory (htsjdk.samtools.SamReaderFactory)4 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)4 SAMRecordToGATKReadAdapter (org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3 Histogram (htsjdk.samtools.util.Histogram)3