Search in sources :

Example 66 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadClassifierTest method restOfFragmentSizeTest.

@Test(groups = "spark")
void restOfFragmentSizeTest() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithGroups(3, 1, 10000000, 1);
    final String groupName = header.getReadGroups().get(0).getReadGroupId();
    final int readSize = 151;
    final int fragmentLen = 400;
    final ReadMetadata.ReadGroupFragmentStatistics groupStats = new ReadMetadata.ReadGroupFragmentStatistics(fragmentLen, 175, 20);
    final Set<Integer> crossContigIgnoreSet = new HashSet<>(3);
    crossContigIgnoreSet.add(2);
    final ReadMetadata readMetadata = new ReadMetadata(crossContigIgnoreSet, header, groupStats, 1, 2L, 2L, 1);
    final String templateName = "xyzzy";
    final int leftStart = 1010101;
    final int rightStart = leftStart + fragmentLen - readSize;
    final List<GATKRead> readPair = ArtificialReadUtils.createPair(header, templateName, readSize, leftStart, rightStart, true, false);
    final GATKRead read = readPair.get(0);
    read.setReadGroup(groupName);
    final ReadClassifier classifier = new ReadClassifier(readMetadata);
    checkClassification(classifier, read, Collections.emptyList());
    read.setCigar(ReadClassifier.MIN_SOFT_CLIP_LEN + "S" + (readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M");
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, true)));
    read.setCigar((readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M" + ReadClassifier.MIN_SOFT_CLIP_LEN + "S");
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, false)));
    read.setCigar((readSize / 2) + "M" + ReadClassifier.MIN_INDEL_LEN + "D" + ((readSize + 1) / 2) + "M");
    final int locus = leftStart + readSize / 2 + ReadClassifier.MIN_INDEL_LEN / 2;
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.LargeIndel(read, readMetadata, locus)));
    read.setCigar(readSize + "M");
    read.setMateIsUnmapped();
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.MateUnmapped(read, readMetadata)));
    read.setMatePosition(read.getContig(), rightStart);
    read.setIsReverseStrand(true);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
    read.setIsReverseStrand(false);
    read.setMateIsReverseStrand(false);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
    read.setIsReverseStrand(true);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
    read.setMatePosition(read.getContig(), read.getStart() + 2);
    checkClassification(classifier, read, Collections.emptyList());
    read.setMatePosition(read.getContig(), read.getStart() + 2 + ReadClassifier.ALLOWED_SHORT_FRAGMENT_OVERHANG);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
    read.setIsReverseStrand(false);
    read.setMateIsReverseStrand(true);
    read.setMatePosition(read.getContig(), read.getStart() - 2);
    checkClassification(classifier, read, Collections.emptyList());
    read.setIsReverseStrand(false);
    read.setMatePosition(header.getSequenceDictionary().getSequence(1).getSequenceName(), rightStart);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.InterContigPair(read, readMetadata)));
    read.setMatePosition(header.getSequenceDictionary().getSequence(2).getSequenceName(), rightStart);
    checkClassification(classifier, read, Collections.emptyList());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 67 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testNullConstructorParametersOffsets.

@Test(expectedExceptions = IllegalArgumentException.class)
public void testNullConstructorParametersOffsets() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
    new ReadPileup(loc, Arrays.asList(read1), null);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 68 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileup.

@Test
public void testSimplePileup() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "5M3I2M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final ReadPileup pu = new ReadPileup(loc, reads, 1);
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(0), // the bases
    (char) read2.getBase(0), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(0))));
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 69 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileupWithOffset.

@Test
public void testSimplePileupWithOffset() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "10M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final int off = 6;
    final ReadPileup pu = new ReadPileup(loc, reads, off);
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(off), // the bases
    (char) read2.getBase(off), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(off)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(off))));
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getRead().isReverseStrand()).isEmpty(), "getReverseStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> !r.getRead().isReverseStrand()).size(), 2, "getForwardStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(off, off), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(p -> p.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 70 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testGetPileupForSample.

@Test
public void testGetPileupForSample() {
    // read groups and header
    final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    int s = 0;
    // readGroups[4] is left null intentionally
    for (final String sample : Arrays.asList("sample1", "sample1", "sample2", null)) {
        readGroups[s] = new SAMReadGroupRecord("rg" + s);
        readGroups[s].setSample(sample);
        header.addReadGroup(readGroups[s++]);
    }
    // reads
    final int rgCoverage = 4;
    final List<GATKRead> reads = new ArrayList<>(rgCoverage * readGroups.length);
    for (int i = 0; i < rgCoverage; i++) {
        for (final SAMReadGroupRecord rg : readGroups) {
            final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
            if (rg != null) {
                r.setReadGroup(rg.getId());
            }
            reads.add(r);
        }
    }
    // pileup
    final ReadPileup pileup = new ReadPileup(loc, reads, 1);
    // sample1
    final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
    Assert.assertEquals(pileupSample1.size(), rgCoverage * 2, "Wrong number of elements for sample1");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null), "Pileup for sample1 shouldn't contain null read group");
    // sample2
    final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
    Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
    Assert.assertTrue(pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null), "Pileup for sample2 shouldn't contain null read group");
    // null sample
    final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
    Assert.assertEquals(pileupNull.size(), rgCoverage * 2, "Wrong number of elements for null sample");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains(null), "Pileup for null sample should contain null read group");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10