Search in sources :

Example 21 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class MarkDuplicatesSparkUtilsUnitTest method testSpanReadsByKeyWithAlternatingGroups.

@Test(groups = "spark")
public void testSpanReadsByKeyWithAlternatingGroups() {
    SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithGroups(1, 1, 1000, 2);
    GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 1, 20);
    read1.setReadGroup(getReadGroupId(header, 0));
    GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 2, 20);
    read2.setReadGroup(getReadGroupId(header, 1));
    GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 3, 20);
    read3.setReadGroup(getReadGroupId(header, 0));
    GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 4, 20);
    read4.setReadGroup(getReadGroupId(header, 1));
    String key1 = ReadsKey.keyForRead(header, read1);
    String key2 = ReadsKey.keyForRead(header, read2);
    String key3 = ReadsKey.keyForRead(header, read3);
    String key4 = ReadsKey.keyForRead(header, read4);
    Assert.assertEquals("ReadGroup0|N", key1);
    Assert.assertEquals("ReadGroup1|N", key2);
    Assert.assertEquals("ReadGroup0|N", key3);
    Assert.assertEquals("ReadGroup1|N", key4);
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    JavaRDD<GATKRead> reads = ctx.parallelize(ImmutableList.of(read1, read2, read3, read4), 1);
    JavaPairRDD<String, Iterable<GATKRead>> groupedReads = MarkDuplicatesSparkUtils.spanReadsByKey(header, reads);
    Assert.assertEquals(groupedReads.collect(), ImmutableList.of(pairIterable(key1, read1, read3), pairIterable(key2, read2, read4)));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 22 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadClassifierTest method restOfFragmentSizeTest.

@Test(groups = "spark")
void restOfFragmentSizeTest() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithGroups(3, 1, 10000000, 1);
    final String groupName = header.getReadGroups().get(0).getReadGroupId();
    final int readSize = 151;
    final int fragmentLen = 400;
    final ReadMetadata.ReadGroupFragmentStatistics groupStats = new ReadMetadata.ReadGroupFragmentStatistics(fragmentLen, 175, 20);
    final Set<Integer> crossContigIgnoreSet = new HashSet<>(3);
    crossContigIgnoreSet.add(2);
    final ReadMetadata readMetadata = new ReadMetadata(crossContigIgnoreSet, header, groupStats, 1, 2L, 2L, 1);
    final String templateName = "xyzzy";
    final int leftStart = 1010101;
    final int rightStart = leftStart + fragmentLen - readSize;
    final List<GATKRead> readPair = ArtificialReadUtils.createPair(header, templateName, readSize, leftStart, rightStart, true, false);
    final GATKRead read = readPair.get(0);
    read.setReadGroup(groupName);
    final ReadClassifier classifier = new ReadClassifier(readMetadata);
    checkClassification(classifier, read, Collections.emptyList());
    read.setCigar(ReadClassifier.MIN_SOFT_CLIP_LEN + "S" + (readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M");
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, true)));
    read.setCigar((readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M" + ReadClassifier.MIN_SOFT_CLIP_LEN + "S");
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, false)));
    read.setCigar((readSize / 2) + "M" + ReadClassifier.MIN_INDEL_LEN + "D" + ((readSize + 1) / 2) + "M");
    final int locus = leftStart + readSize / 2 + ReadClassifier.MIN_INDEL_LEN / 2;
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.LargeIndel(read, readMetadata, locus)));
    read.setCigar(readSize + "M");
    read.setMateIsUnmapped();
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.MateUnmapped(read, readMetadata)));
    read.setMatePosition(read.getContig(), rightStart);
    read.setIsReverseStrand(true);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
    read.setIsReverseStrand(false);
    read.setMateIsReverseStrand(false);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
    read.setIsReverseStrand(true);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
    read.setMatePosition(read.getContig(), read.getStart() + 2);
    checkClassification(classifier, read, Collections.emptyList());
    read.setMatePosition(read.getContig(), read.getStart() + 2 + ReadClassifier.ALLOWED_SHORT_FRAGMENT_OVERHANG);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
    read.setIsReverseStrand(false);
    read.setMateIsReverseStrand(true);
    read.setMatePosition(read.getContig(), read.getStart() - 2);
    checkClassification(classifier, read, Collections.emptyList());
    read.setIsReverseStrand(false);
    read.setMatePosition(header.getSequenceDictionary().getSequence(1).getSequenceName(), rightStart);
    checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.InterContigPair(read, readMetadata)));
    read.setMatePosition(header.getSequenceDictionary().getSequence(2).getSequenceName(), rightStart);
    checkClassification(classifier, read, Collections.emptyList());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 23 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testNullConstructorParametersOffsets.

@Test(expectedExceptions = IllegalArgumentException.class)
public void testNullConstructorParametersOffsets() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
    new ReadPileup(loc, Arrays.asList(read1), null);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 24 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileup.

@Test
public void testSimplePileup() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "5M3I2M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final ReadPileup pu = new ReadPileup(loc, reads, 1);
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(0), // the bases
    (char) read2.getBase(0), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(0))));
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 25 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileupWithOffset.

@Test
public void testSimplePileupWithOffset() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "10M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final int off = 6;
    final ReadPileup pu = new ReadPileup(loc, reads, off);
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(off), // the bases
    (char) read2.getBase(off), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(off)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(off))));
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getRead().isReverseStrand()).isEmpty(), "getReverseStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> !r.getRead().isReverseStrand()).size(), 2, "getForwardStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(off, off), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(p -> p.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10