use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class ReadPileupUnitTest method testSimplePileup.
@Test
public void testSimplePileup() {
final int readlength = 10;
final byte[] bases1 = Utils.repeatChars('A', readlength);
final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
final String cigar1 = "10M";
final byte[] bases2 = Utils.repeatChars('C', readlength);
final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
final String cigar2 = "5M3I2M";
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
read1.setName("read1");
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
read1.setName("read2");
final List<GATKRead> reads = Arrays.asList(read1, read2);
final ReadPileup pu = new ReadPileup(loc, reads, 1);
//checking non-blowup. We're not making any claims about the format of toString
Assert.assertNotNull(pu.toString());
Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
loc.getContig(), // the position
loc.getStart(), // the bases
(char) read1.getBase(0), // the bases
(char) read2.getBase(0), // base quality in FASTQ format
SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
SAMUtils.phredToFastq(read2.getBaseQuality(0))));
Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
Assert.assertFalse(pu.isEmpty());
Assert.assertEquals(pu.size(), 2, "size");
Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
Assert.assertEquals(pu.getLocation(), loc);
Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class ReadPileupUnitTest method testSimplePileupWithOffset.
@Test
public void testSimplePileupWithOffset() {
final int readlength = 10;
final byte[] bases1 = Utils.repeatChars('A', readlength);
final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
final String cigar1 = "10M";
final byte[] bases2 = Utils.repeatChars('C', readlength);
final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
final String cigar2 = "10M";
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
read1.setName("read1");
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
read1.setName("read2");
final List<GATKRead> reads = Arrays.asList(read1, read2);
final int off = 6;
final ReadPileup pu = new ReadPileup(loc, reads, off);
Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
//checking non-blowup. We're not making any claims about the format of toString
Assert.assertNotNull(pu.toString());
Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
loc.getContig(), // the position
loc.getStart(), // the bases
(char) read1.getBase(off), // the bases
(char) read2.getBase(off), // base quality in FASTQ format
SAMUtils.phredToFastq(read1.getBaseQuality(off)), // base quality in FASTQ format
SAMUtils.phredToFastq(read2.getBaseQuality(off))));
Assert.assertFalse(pu.isEmpty());
Assert.assertEquals(pu.size(), 2, "size");
Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
Assert.assertEquals(pu.getLocation(), loc);
Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
Assert.assertTrue(pu.makeFilteredPileup(r -> r.getRead().isReverseStrand()).isEmpty(), "getReverseStrandPileup");
Assert.assertEquals(pu.makeFilteredPileup(r -> !r.getRead().isReverseStrand()).size(), 2, "getForwardStrandPileup");
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
Assert.assertEquals(pu.getOffsets(), Arrays.asList(off, off), "getOffsets");
Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
Assert.assertTrue(pu.makeFilteredPileup(p -> p.getMappingQual() >= 10).isEmpty());
}
use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class ReadPileupUnitTest method testGetPileupForSample.
@Test
public void testGetPileupForSample() {
// read groups and header
final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
int s = 0;
// readGroups[4] is left null intentionally
for (final String sample : Arrays.asList("sample1", "sample1", "sample2", null)) {
readGroups[s] = new SAMReadGroupRecord("rg" + s);
readGroups[s].setSample(sample);
header.addReadGroup(readGroups[s++]);
}
// reads
final int rgCoverage = 4;
final List<GATKRead> reads = new ArrayList<>(rgCoverage * readGroups.length);
for (int i = 0; i < rgCoverage; i++) {
for (final SAMReadGroupRecord rg : readGroups) {
final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
if (rg != null) {
r.setReadGroup(rg.getId());
}
reads.add(r);
}
}
// pileup
final ReadPileup pileup = new ReadPileup(loc, reads, 1);
// sample1
final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
Assert.assertEquals(pileupSample1.size(), rgCoverage * 2, "Wrong number of elements for sample1");
Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null), "Pileup for sample1 shouldn't contain null read group");
// sample2
final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
Assert.assertTrue(pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null), "Pileup for sample2 shouldn't contain null read group");
// null sample
final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
Assert.assertEquals(pileupNull.size(), rgCoverage * 2, "Wrong number of elements for null sample");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
Assert.assertTrue(pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
Assert.assertTrue(pileupNull.getReadGroupIDs().contains(null), "Pileup for null sample should contain null read group");
}
use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class RecalibrationReportUnitTest method testOutput.
@Test
public void testOutput() {
final int length = 100;
List<Byte> quals = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
List<Long> counts = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
for (int i = 0; i <= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
quals.add((byte) i);
counts.add(1L);
}
final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
quantizationInfo.noQuantization();
final String readGroupID = "id";
final StandardCovariateList covariateList = new StandardCovariateList(RAC, Collections.singletonList(readGroupID));
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupID);
rg.setPlatform("illumina");
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
final GATKRead read = ArtificialReadUtils.createRandomRead(header, length, false);
read.setReadGroup(rg.getReadGroupId());
final byte[] readQuals = new byte[length];
for (int i = 0; i < length; i++) readQuals[i] = 20;
read.setBaseQualities(readQuals);
final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
// keep track of how many keys were produced
int nKeys = 0;
final ReadCovariates rc = RecalUtils.computeCovariates(read, header, covariateList, true, new CovariateKeyCache());
final RecalibrationTables recalibrationTables = new RecalibrationTables(covariateList);
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
for (int offset = 0; offset < length; offset++) {
for (EventType errorMode : EventType.values()) {
final int[] covariates = rc.getKeySet(offset, errorMode);
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
nKeys += 2;
for (NestedIntegerArray<RecalDatum> covTable : recalibrationTables.getAdditionalTables()) {
Covariate cov = recalibrationTables.getCovariateForTable(covTable);
final int covValue = covariates[covariateList.indexByClass(cov.getClass())];
if (covValue >= 0) {
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
nKeys++;
}
}
}
}
Assert.assertEquals(nKeys, expectedKeys);
}
use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.
the class SamFileExtract method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(bam_in));
final SAMFileHeader header = inputSam.getFileHeader();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileHeader header_out = new SAMFileHeader();
final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
SAMRecordIterator iter = inputSam.iterator();
File bed_file = new File(bed_in);
final Set<String> extract = new HashSet<String>();
try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
String line;
while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
header_out.setAttribute("VN", header.getAttribute("VN"));
header_out.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
seqdic_out.addSequence(seq);
header_out.setSequenceDictionary(seqdic_out);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (extract.contains(rec.getReferenceName()))
outputSam.addAlignment(rec);
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outputSam.close();
System.err.println(bam_in + " return true");
}
Aggregations