Search in sources :

Example 26 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testGetPileupForSample.

@Test
public void testGetPileupForSample() {
    // read groups and header
    final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    int s = 0;
    // readGroups[4] is left null intentionally
    for (final String sample : Arrays.asList("sample1", "sample1", "sample2", null)) {
        readGroups[s] = new SAMReadGroupRecord("rg" + s);
        readGroups[s].setSample(sample);
        header.addReadGroup(readGroups[s++]);
    }
    // reads
    final int rgCoverage = 4;
    final List<GATKRead> reads = new ArrayList<>(rgCoverage * readGroups.length);
    for (int i = 0; i < rgCoverage; i++) {
        for (final SAMReadGroupRecord rg : readGroups) {
            final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
            if (rg != null) {
                r.setReadGroup(rg.getId());
            }
            reads.add(r);
        }
    }
    // pileup
    final ReadPileup pileup = new ReadPileup(loc, reads, 1);
    // sample1
    final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
    Assert.assertEquals(pileupSample1.size(), rgCoverage * 2, "Wrong number of elements for sample1");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null), "Pileup for sample1 shouldn't contain null read group");
    // sample2
    final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
    Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
    Assert.assertTrue(pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null), "Pileup for sample2 shouldn't contain null read group");
    // null sample
    final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
    Assert.assertEquals(pileupNull.size(), rgCoverage * 2, "Wrong number of elements for null sample");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains(null), "Pileup for null sample should contain null read group");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 27 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class RecalibrationReportUnitTest method testOutput.

@Test
public void testOutput() {
    final int length = 100;
    List<Byte> quals = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
    List<Long> counts = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
    for (int i = 0; i <= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
        quals.add((byte) i);
        counts.add(1L);
    }
    final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
    final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    quantizationInfo.noQuantization();
    final String readGroupID = "id";
    final StandardCovariateList covariateList = new StandardCovariateList(RAC, Collections.singletonList(readGroupID));
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupID);
    rg.setPlatform("illumina");
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
    final GATKRead read = ArtificialReadUtils.createRandomRead(header, length, false);
    read.setReadGroup(rg.getReadGroupId());
    final byte[] readQuals = new byte[length];
    for (int i = 0; i < length; i++) readQuals[i] = 20;
    read.setBaseQualities(readQuals);
    final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
    // keep track of how many keys were produced
    int nKeys = 0;
    final ReadCovariates rc = RecalUtils.computeCovariates(read, header, covariateList, true, new CovariateKeyCache());
    final RecalibrationTables recalibrationTables = new RecalibrationTables(covariateList);
    final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getReadGroupTable();
    final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
    for (int offset = 0; offset < length; offset++) {
        for (EventType errorMode : EventType.values()) {
            final int[] covariates = rc.getKeySet(offset, errorMode);
            final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
            rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
            qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
            nKeys += 2;
            for (NestedIntegerArray<RecalDatum> covTable : recalibrationTables.getAdditionalTables()) {
                Covariate cov = recalibrationTables.getCovariateForTable(covTable);
                final int covValue = covariates[covariateList.indexByClass(cov.getClass())];
                if (covValue >= 0) {
                    covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
                    nKeys++;
                }
            }
        }
    }
    Assert.assertEquals(nKeys, expectedKeys);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 28 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadUtilsUnitTest method makeHasWellDefinedFragmentSizeData.

@DataProvider(name = "HasWellDefinedFragmentSizeData")
public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception {
    final List<Object[]> tests = new LinkedList<>();
    // setup a basic read that will work
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10);
    read.setIsPaired(true);
    read.setIsProperlyPaired(true);
    read.setPosition(read.getContig(), 100);
    read.setCigar("50M");
    read.setMatePosition(read.getContig(), 130);
    read.setFragmentLength(80);
    read.setIsFirstOfPair();
    read.setIsReverseStrand(false);
    read.setMateIsReverseStrand(true);
    tests.add(new Object[] { "basic case", read.copy(), true });
    {
        final GATKRead bad1 = read.copy();
        bad1.setIsPaired(false);
        tests.add(new Object[] { "not paired", bad1, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setIsProperlyPaired(false);
        // we currently don't require the proper pair flag to be set
        tests.add(new Object[] { "not proper pair", bad, true });
    //            tests.add( new Object[]{ "not proper pair", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setIsUnmapped();
        tests.add(new Object[] { "read is unmapped", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setMateIsUnmapped();
        tests.add(new Object[] { "mate is unmapped", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setMateIsReverseStrand(false);
        tests.add(new Object[] { "read and mate both on positive strand", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setIsReverseStrand(true);
        tests.add(new Object[] { "read and mate both on negative strand", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setFragmentLength(0);
        tests.add(new Object[] { "insert size is 0", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setPosition(bad.getContig(), 1000);
        tests.add(new Object[] { "positve read starts after mate end", bad, false });
    }
    {
        final GATKRead bad = read.copy();
        bad.setIsReverseStrand(true);
        bad.setMateIsReverseStrand(false);
        bad.setMatePosition(bad.getMateContig(), 1000);
        tests.add(new Object[] { "negative strand read ends before mate starts", bad, false });
    }
    return tests.toArray(new Object[][] {});
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataProvider(org.testng.annotations.DataProvider)

Example 29 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadUtilsUnitTest method testGetAssignedReferenceIndex.

@Test
public void testGetAssignedReferenceIndex() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead mappedRead = ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10);
    final GATKRead unmappedRead = ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 });
    final GATKRead unmappedReadWithAssignedPosition = ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "2", 10, new byte[] { 'A' }, new byte[] { 30 });
    Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(mappedRead, header), 0);
    Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(unmappedRead, header), -1);
    Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(unmappedReadWithAssignedPosition, header), 1);
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 30 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadUtilsUnitTest method readHasNoAssignedPositionTestData.

@DataProvider(name = "ReadHasNoAssignedPositionTestData")
public Object[][] readHasNoAssignedPositionTestData() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    // To test the "null contig" cases, we need to use a Google Genomics Read, since SAMRecord doesn't allow it
    final Read unmappedGoogleReadWithNullContigSetStart = new Read();
    unmappedGoogleReadWithNullContigSetStart.setAlignment(new LinearAlignment());
    unmappedGoogleReadWithNullContigSetStart.getAlignment().setPosition(new Position());
    unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setReferenceName(null);
    unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setPosition(10l);
    final GATKRead unmappedReadWithNullContigSetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigSetStart);
    final Read unmappedGoogleReadWithNullContigUnsetStart = new Read();
    unmappedGoogleReadWithNullContigUnsetStart.setAlignment(new LinearAlignment());
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().setPosition(new Position());
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setReferenceName(null);
    unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setPosition(Long.valueOf(ReadConstants.UNSET_POSITION));
    final GATKRead unmappedReadWithNullContigUnsetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigUnsetStart);
    // We'll also test the improbable case of a SAMRecord marked as mapped, but with an unset contig/start
    final SAMRecord mappedSAMWithUnsetContigSetStart = new SAMRecord(header);
    mappedSAMWithUnsetContigSetStart.setReferenceName(ReadConstants.UNSET_CONTIG);
    mappedSAMWithUnsetContigSetStart.setAlignmentStart(10);
    mappedSAMWithUnsetContigSetStart.setReadUnmappedFlag(false);
    final GATKRead mappedReadWithUnsetContigSetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithUnsetContigSetStart);
    final SAMRecord mappedSAMWithSetContigUnsetStart = new SAMRecord(header);
    mappedSAMWithSetContigUnsetStart.setReferenceName("1");
    mappedSAMWithSetContigUnsetStart.setAlignmentStart(ReadConstants.UNSET_POSITION);
    mappedSAMWithSetContigUnsetStart.setReadUnmappedFlag(false);
    final GATKRead mappedReadWithSetContigUnsetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithSetContigUnsetStart);
    return new Object[][] { // Mapped read with position
    { ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10), false }, // Basic unmapped read with no position
    { ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set position (contig and start)
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", 10, new byte[] { 'A' }, new byte[] { 30 }), false }, // Unmapped read with null contig, set start
    { unmappedReadWithNullContigSetStart, true }, // Unmapped read with "*" contig, set start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, 10, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set contig, unset start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with null contig, unset start
    { unmappedReadWithNullContigUnsetStart, true }, // Unmapped read with "*" contig, unset start
    { ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // "Mapped" read with unset contig, set start
    { mappedReadWithUnsetContigSetStart, true }, // "Mapped" read with set contig, unset start
    { mappedReadWithSetContigUnsetStart, true } };
}
Also used : Read(com.google.api.services.genomics.model.Read) LinearAlignment(com.google.api.services.genomics.model.LinearAlignment) Position(com.google.api.services.genomics.model.Position) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataProvider(org.testng.annotations.DataProvider)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10