use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadPileupUnitTest method testGetPileupForSample.
@Test
public void testGetPileupForSample() {
// read groups and header
final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
int s = 0;
// readGroups[4] is left null intentionally
for (final String sample : Arrays.asList("sample1", "sample1", "sample2", null)) {
readGroups[s] = new SAMReadGroupRecord("rg" + s);
readGroups[s].setSample(sample);
header.addReadGroup(readGroups[s++]);
}
// reads
final int rgCoverage = 4;
final List<GATKRead> reads = new ArrayList<>(rgCoverage * readGroups.length);
for (int i = 0; i < rgCoverage; i++) {
for (final SAMReadGroupRecord rg : readGroups) {
final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
if (rg != null) {
r.setReadGroup(rg.getId());
}
reads.add(r);
}
}
// pileup
final ReadPileup pileup = new ReadPileup(loc, reads, 1);
// sample1
final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
Assert.assertEquals(pileupSample1.size(), rgCoverage * 2, "Wrong number of elements for sample1");
Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null), "Pileup for sample1 shouldn't contain null read group");
// sample2
final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
Assert.assertTrue(pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null), "Pileup for sample2 shouldn't contain null read group");
// null sample
final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
Assert.assertEquals(pileupNull.size(), rgCoverage * 2, "Wrong number of elements for null sample");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
Assert.assertTrue(pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
Assert.assertTrue(pileupNull.getReadGroupIDs().contains(null), "Pileup for null sample should contain null read group");
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class RecalibrationReportUnitTest method testOutput.
@Test
public void testOutput() {
final int length = 100;
List<Byte> quals = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
List<Long> counts = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
for (int i = 0; i <= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
quals.add((byte) i);
counts.add(1L);
}
final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
quantizationInfo.noQuantization();
final String readGroupID = "id";
final StandardCovariateList covariateList = new StandardCovariateList(RAC, Collections.singletonList(readGroupID));
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupID);
rg.setPlatform("illumina");
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
final GATKRead read = ArtificialReadUtils.createRandomRead(header, length, false);
read.setReadGroup(rg.getReadGroupId());
final byte[] readQuals = new byte[length];
for (int i = 0; i < length; i++) readQuals[i] = 20;
read.setBaseQualities(readQuals);
final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
// keep track of how many keys were produced
int nKeys = 0;
final ReadCovariates rc = RecalUtils.computeCovariates(read, header, covariateList, true, new CovariateKeyCache());
final RecalibrationTables recalibrationTables = new RecalibrationTables(covariateList);
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
for (int offset = 0; offset < length; offset++) {
for (EventType errorMode : EventType.values()) {
final int[] covariates = rc.getKeySet(offset, errorMode);
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
nKeys += 2;
for (NestedIntegerArray<RecalDatum> covTable : recalibrationTables.getAdditionalTables()) {
Covariate cov = recalibrationTables.getCovariateForTable(covTable);
final int covValue = covariates[covariateList.indexByClass(cov.getClass())];
if (covValue >= 0) {
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
nKeys++;
}
}
}
}
Assert.assertEquals(nKeys, expectedKeys);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadUtilsUnitTest method makeHasWellDefinedFragmentSizeData.
@DataProvider(name = "HasWellDefinedFragmentSizeData")
public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception {
final List<Object[]> tests = new LinkedList<>();
// setup a basic read that will work
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 10, 10);
read.setIsPaired(true);
read.setIsProperlyPaired(true);
read.setPosition(read.getContig(), 100);
read.setCigar("50M");
read.setMatePosition(read.getContig(), 130);
read.setFragmentLength(80);
read.setIsFirstOfPair();
read.setIsReverseStrand(false);
read.setMateIsReverseStrand(true);
tests.add(new Object[] { "basic case", read.copy(), true });
{
final GATKRead bad1 = read.copy();
bad1.setIsPaired(false);
tests.add(new Object[] { "not paired", bad1, false });
}
{
final GATKRead bad = read.copy();
bad.setIsProperlyPaired(false);
// we currently don't require the proper pair flag to be set
tests.add(new Object[] { "not proper pair", bad, true });
// tests.add( new Object[]{ "not proper pair", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setIsUnmapped();
tests.add(new Object[] { "read is unmapped", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setMateIsUnmapped();
tests.add(new Object[] { "mate is unmapped", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setMateIsReverseStrand(false);
tests.add(new Object[] { "read and mate both on positive strand", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setIsReverseStrand(true);
tests.add(new Object[] { "read and mate both on negative strand", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setFragmentLength(0);
tests.add(new Object[] { "insert size is 0", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setPosition(bad.getContig(), 1000);
tests.add(new Object[] { "positve read starts after mate end", bad, false });
}
{
final GATKRead bad = read.copy();
bad.setIsReverseStrand(true);
bad.setMateIsReverseStrand(false);
bad.setMatePosition(bad.getMateContig(), 1000);
tests.add(new Object[] { "negative strand read ends before mate starts", bad, false });
}
return tests.toArray(new Object[][] {});
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadUtilsUnitTest method testGetAssignedReferenceIndex.
@Test
public void testGetAssignedReferenceIndex() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead mappedRead = ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10);
final GATKRead unmappedRead = ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 });
final GATKRead unmappedReadWithAssignedPosition = ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "2", 10, new byte[] { 'A' }, new byte[] { 30 });
Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(mappedRead, header), 0);
Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(unmappedRead, header), -1);
Assert.assertEquals(ReadUtils.getAssignedReferenceIndex(unmappedReadWithAssignedPosition, header), 1);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadUtilsUnitTest method readHasNoAssignedPositionTestData.
@DataProvider(name = "ReadHasNoAssignedPositionTestData")
public Object[][] readHasNoAssignedPositionTestData() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
// To test the "null contig" cases, we need to use a Google Genomics Read, since SAMRecord doesn't allow it
final Read unmappedGoogleReadWithNullContigSetStart = new Read();
unmappedGoogleReadWithNullContigSetStart.setAlignment(new LinearAlignment());
unmappedGoogleReadWithNullContigSetStart.getAlignment().setPosition(new Position());
unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setReferenceName(null);
unmappedGoogleReadWithNullContigSetStart.getAlignment().getPosition().setPosition(10l);
final GATKRead unmappedReadWithNullContigSetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigSetStart);
final Read unmappedGoogleReadWithNullContigUnsetStart = new Read();
unmappedGoogleReadWithNullContigUnsetStart.setAlignment(new LinearAlignment());
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().setPosition(new Position());
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setReferenceName(null);
unmappedGoogleReadWithNullContigUnsetStart.getAlignment().getPosition().setPosition(Long.valueOf(ReadConstants.UNSET_POSITION));
final GATKRead unmappedReadWithNullContigUnsetStart = new GoogleGenomicsReadToGATKReadAdapter(unmappedGoogleReadWithNullContigUnsetStart);
// We'll also test the improbable case of a SAMRecord marked as mapped, but with an unset contig/start
final SAMRecord mappedSAMWithUnsetContigSetStart = new SAMRecord(header);
mappedSAMWithUnsetContigSetStart.setReferenceName(ReadConstants.UNSET_CONTIG);
mappedSAMWithUnsetContigSetStart.setAlignmentStart(10);
mappedSAMWithUnsetContigSetStart.setReadUnmappedFlag(false);
final GATKRead mappedReadWithUnsetContigSetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithUnsetContigSetStart);
final SAMRecord mappedSAMWithSetContigUnsetStart = new SAMRecord(header);
mappedSAMWithSetContigUnsetStart.setReferenceName("1");
mappedSAMWithSetContigUnsetStart.setAlignmentStart(ReadConstants.UNSET_POSITION);
mappedSAMWithSetContigUnsetStart.setReadUnmappedFlag(false);
final GATKRead mappedReadWithSetContigUnsetStart = new SAMRecordToGATKReadAdapter(mappedSAMWithSetContigUnsetStart);
return new Object[][] { // Mapped read with position
{ ArtificialReadUtils.createArtificialRead(header, "foo", 0, 5, 10), false }, // Basic unmapped read with no position
{ ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set position (contig and start)
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", 10, new byte[] { 'A' }, new byte[] { 30 }), false }, // Unmapped read with null contig, set start
{ unmappedReadWithNullContigSetStart, true }, // Unmapped read with "*" contig, set start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, 10, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with set contig, unset start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // Unmapped read with null contig, unset start
{ unmappedReadWithNullContigUnsetStart, true }, // Unmapped read with "*" contig, unset start
{ ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, ReadConstants.UNSET_CONTIG, ReadConstants.UNSET_POSITION, new byte[] { 'A' }, new byte[] { 30 }), true }, // "Mapped" read with unset contig, set start
{ mappedReadWithUnsetContigSetStart, true }, // "Mapped" read with set contig, unset start
{ mappedReadWithSetContigUnsetStart, true } };
}
Aggregations