use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class MarkDuplicatesSparkUtilsUnitTest method testSpanReadsByKeyWithAlternatingGroups.
@Test(groups = "spark")
public void testSpanReadsByKeyWithAlternatingGroups() {
SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithGroups(1, 1, 1000, 2);
GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 1, 20);
read1.setReadGroup(getReadGroupId(header, 0));
GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 2, 20);
read2.setReadGroup(getReadGroupId(header, 1));
GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 3, 20);
read3.setReadGroup(getReadGroupId(header, 0));
GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "N", 0, 4, 20);
read4.setReadGroup(getReadGroupId(header, 1));
String key1 = ReadsKey.keyForRead(header, read1);
String key2 = ReadsKey.keyForRead(header, read2);
String key3 = ReadsKey.keyForRead(header, read3);
String key4 = ReadsKey.keyForRead(header, read4);
Assert.assertEquals("ReadGroup0|N", key1);
Assert.assertEquals("ReadGroup1|N", key2);
Assert.assertEquals("ReadGroup0|N", key3);
Assert.assertEquals("ReadGroup1|N", key4);
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
JavaRDD<GATKRead> reads = ctx.parallelize(ImmutableList.of(read1, read2, read3, read4), 1);
JavaPairRDD<String, Iterable<GATKRead>> groupedReads = MarkDuplicatesSparkUtils.spanReadsByKey(header, reads);
Assert.assertEquals(groupedReads.collect(), ImmutableList.of(pairIterable(key1, read1, read3), pairIterable(key2, read2, read4)));
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadClassifierTest method restOfFragmentSizeTest.
@Test(groups = "spark")
void restOfFragmentSizeTest() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithGroups(3, 1, 10000000, 1);
final String groupName = header.getReadGroups().get(0).getReadGroupId();
final int readSize = 151;
final int fragmentLen = 400;
final ReadMetadata.ReadGroupFragmentStatistics groupStats = new ReadMetadata.ReadGroupFragmentStatistics(fragmentLen, 175, 20);
final Set<Integer> crossContigIgnoreSet = new HashSet<>(3);
crossContigIgnoreSet.add(2);
final ReadMetadata readMetadata = new ReadMetadata(crossContigIgnoreSet, header, groupStats, 1, 2L, 2L, 1);
final String templateName = "xyzzy";
final int leftStart = 1010101;
final int rightStart = leftStart + fragmentLen - readSize;
final List<GATKRead> readPair = ArtificialReadUtils.createPair(header, templateName, readSize, leftStart, rightStart, true, false);
final GATKRead read = readPair.get(0);
read.setReadGroup(groupName);
final ReadClassifier classifier = new ReadClassifier(readMetadata);
checkClassification(classifier, read, Collections.emptyList());
read.setCigar(ReadClassifier.MIN_SOFT_CLIP_LEN + "S" + (readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M");
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, true)));
read.setCigar((readSize - ReadClassifier.MIN_SOFT_CLIP_LEN) + "M" + ReadClassifier.MIN_SOFT_CLIP_LEN + "S");
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SplitRead(read, readMetadata, false)));
read.setCigar((readSize / 2) + "M" + ReadClassifier.MIN_INDEL_LEN + "D" + ((readSize + 1) / 2) + "M");
final int locus = leftStart + readSize / 2 + ReadClassifier.MIN_INDEL_LEN / 2;
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.LargeIndel(read, readMetadata, locus)));
read.setCigar(readSize + "M");
read.setMateIsUnmapped();
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.MateUnmapped(read, readMetadata)));
read.setMatePosition(read.getContig(), rightStart);
read.setIsReverseStrand(true);
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
read.setIsReverseStrand(false);
read.setMateIsReverseStrand(false);
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.SameStrandPair(read, readMetadata)));
read.setIsReverseStrand(true);
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
read.setMatePosition(read.getContig(), read.getStart() + 2);
checkClassification(classifier, read, Collections.emptyList());
read.setMatePosition(read.getContig(), read.getStart() + 2 + ReadClassifier.ALLOWED_SHORT_FRAGMENT_OVERHANG);
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.OutiesPair(read, readMetadata)));
read.setIsReverseStrand(false);
read.setMateIsReverseStrand(true);
read.setMatePosition(read.getContig(), read.getStart() - 2);
checkClassification(classifier, read, Collections.emptyList());
read.setIsReverseStrand(false);
read.setMatePosition(header.getSequenceDictionary().getSequence(1).getSequenceName(), rightStart);
checkClassification(classifier, read, Collections.singletonList(new BreakpointEvidence.InterContigPair(read, readMetadata)));
read.setMatePosition(header.getSequenceDictionary().getSequence(2).getSequenceName(), rightStart);
checkClassification(classifier, read, Collections.emptyList());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadPileupUnitTest method testNullConstructorParametersOffsets.
@Test(expectedExceptions = IllegalArgumentException.class)
public void testNullConstructorParametersOffsets() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
new ReadPileup(loc, Arrays.asList(read1), null);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadPileupUnitTest method testSimplePileup.
@Test
public void testSimplePileup() {
final int readlength = 10;
final byte[] bases1 = Utils.repeatChars('A', readlength);
final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
final String cigar1 = "10M";
final byte[] bases2 = Utils.repeatChars('C', readlength);
final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
final String cigar2 = "5M3I2M";
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
read1.setName("read1");
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
read1.setName("read2");
final List<GATKRead> reads = Arrays.asList(read1, read2);
final ReadPileup pu = new ReadPileup(loc, reads, 1);
//checking non-blowup. We're not making any claims about the format of toString
Assert.assertNotNull(pu.toString());
Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
loc.getContig(), // the position
loc.getStart(), // the bases
(char) read1.getBase(0), // the bases
(char) read2.getBase(0), // base quality in FASTQ format
SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
SAMUtils.phredToFastq(read2.getBaseQuality(0))));
Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
Assert.assertFalse(pu.isEmpty());
Assert.assertEquals(pu.size(), 2, "size");
Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
Assert.assertEquals(pu.getLocation(), loc);
Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadPileupUnitTest method testSimplePileupWithOffset.
@Test
public void testSimplePileupWithOffset() {
final int readlength = 10;
final byte[] bases1 = Utils.repeatChars('A', readlength);
final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
final String cigar1 = "10M";
final byte[] bases2 = Utils.repeatChars('C', readlength);
final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
final String cigar2 = "10M";
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
read1.setName("read1");
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
read1.setName("read2");
final List<GATKRead> reads = Arrays.asList(read1, read2);
final int off = 6;
final ReadPileup pu = new ReadPileup(loc, reads, off);
Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
//checking non-blowup. We're not making any claims about the format of toString
Assert.assertNotNull(pu.toString());
Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
loc.getContig(), // the position
loc.getStart(), // the bases
(char) read1.getBase(off), // the bases
(char) read2.getBase(off), // base quality in FASTQ format
SAMUtils.phredToFastq(read1.getBaseQuality(off)), // base quality in FASTQ format
SAMUtils.phredToFastq(read2.getBaseQuality(off))));
Assert.assertFalse(pu.isEmpty());
Assert.assertEquals(pu.size(), 2, "size");
Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
Assert.assertEquals(pu.getLocation(), loc);
Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
Assert.assertTrue(pu.makeFilteredPileup(r -> r.getRead().isReverseStrand()).isEmpty(), "getReverseStrandPileup");
Assert.assertEquals(pu.makeFilteredPileup(r -> !r.getRead().isReverseStrand()).size(), 2, "getForwardStrandPileup");
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
Assert.assertEquals(pu.getOffsets(), Arrays.asList(off, off), "getOffsets");
Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
Assert.assertTrue(pu.makeFilteredPileup(p -> p.getMappingQual() >= 10).isEmpty());
}
Aggregations