Search in sources :

Example 16 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileup.

@Test
public void testSimplePileup() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "5M3I2M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final ReadPileup pu = new ReadPileup(loc, reads, 1);
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(0), // the bases
    (char) read2.getBase(0), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(0))));
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 17 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class ReadPileupUnitTest method testSimplePileupWithOffset.

@Test
public void testSimplePileupWithOffset() {
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";
    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "10M";
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final int off = 6;
    final ReadPileup pu = new ReadPileup(loc, reads, off);
    Assert.assertEquals(pu.getBases(), new byte[] { (byte) 'A', (byte) 'C' });
    //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertNotNull(pu.toString());
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s", // the position
    loc.getContig(), // the position
    loc.getStart(), // the bases
    (char) read1.getBase(off), // the bases
    (char) read2.getBase(off), // base quality in FASTQ format
    SAMUtils.phredToFastq(read1.getBaseQuality(off)), // base quality in FASTQ format
    SAMUtils.phredToFastq(read2.getBaseQuality(off))));
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[] { 1, 1, 0, 0 });
    Assert.assertEquals(pu.getBaseQuals(), new byte[] { 10, 20 });
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[] { NO_MAPPING_QUALITY, NO_MAPPING_QUALITY });
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getRead().isReverseStrand()).isEmpty(), "getReverseStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> !r.getRead().isReverseStrand()).size(), 2, "getForwardStrandPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(off, off), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), Arrays.asList(new SAMReadGroupRecord[] { null }), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[] { null }), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(p -> p.getMappingQual() >= 10).isEmpty());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Locatable(htsjdk.samtools.util.Locatable) java.util(java.util) SAMUtils(htsjdk.samtools.SAMUtils) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) CigarElement(htsjdk.samtools.CigarElement) BeforeClass(org.testng.annotations.BeforeClass) CigarOperator(htsjdk.samtools.CigarOperator) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) ArtificialReadUtils(org.broadinstitute.hellbender.utils.read.ArtificialReadUtils) Assert(org.testng.Assert) NO_MAPPING_QUALITY(org.broadinstitute.hellbender.utils.read.ReadConstants.NO_MAPPING_QUALITY) Utils(org.broadinstitute.hellbender.utils.Utils) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 18 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class ReadPileupUnitTest method testGetPileupForSample.

@Test
public void testGetPileupForSample() {
    // read groups and header
    final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    int s = 0;
    // readGroups[4] is left null intentionally
    for (final String sample : Arrays.asList("sample1", "sample1", "sample2", null)) {
        readGroups[s] = new SAMReadGroupRecord("rg" + s);
        readGroups[s].setSample(sample);
        header.addReadGroup(readGroups[s++]);
    }
    // reads
    final int rgCoverage = 4;
    final List<GATKRead> reads = new ArrayList<>(rgCoverage * readGroups.length);
    for (int i = 0; i < rgCoverage; i++) {
        for (final SAMReadGroupRecord rg : readGroups) {
            final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
            if (rg != null) {
                r.setReadGroup(rg.getId());
            }
            reads.add(r);
        }
    }
    // pileup
    final ReadPileup pileup = new ReadPileup(loc, reads, 1);
    // sample1
    final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
    Assert.assertEquals(pileupSample1.size(), rgCoverage * 2, "Wrong number of elements for sample1");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
    Assert.assertTrue(pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null), "Pileup for sample1 shouldn't contain null read group");
    // sample2
    final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
    Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
    Assert.assertTrue(pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null), "Pileup for sample2 shouldn't contain null read group");
    // null sample
    final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
    Assert.assertEquals(pileupNull.size(), rgCoverage * 2, "Wrong number of elements for null sample");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
    Assert.assertTrue(pileupNull.getReadGroupIDs().contains(null), "Pileup for null sample should contain null read group");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 19 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class RecalibrationReportUnitTest method testOutput.

@Test
public void testOutput() {
    final int length = 100;
    List<Byte> quals = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
    List<Long> counts = new ArrayList<>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
    for (int i = 0; i <= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
        quals.add((byte) i);
        counts.add(1L);
    }
    final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
    final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    quantizationInfo.noQuantization();
    final String readGroupID = "id";
    final StandardCovariateList covariateList = new StandardCovariateList(RAC, Collections.singletonList(readGroupID));
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupID);
    rg.setPlatform("illumina");
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
    final GATKRead read = ArtificialReadUtils.createRandomRead(header, length, false);
    read.setReadGroup(rg.getReadGroupId());
    final byte[] readQuals = new byte[length];
    for (int i = 0; i < length; i++) readQuals[i] = 20;
    read.setBaseQualities(readQuals);
    final int expectedKeys = expectedNumberOfKeys(length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
    // keep track of how many keys were produced
    int nKeys = 0;
    final ReadCovariates rc = RecalUtils.computeCovariates(read, header, covariateList, true, new CovariateKeyCache());
    final RecalibrationTables recalibrationTables = new RecalibrationTables(covariateList);
    final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getReadGroupTable();
    final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
    for (int offset = 0; offset < length; offset++) {
        for (EventType errorMode : EventType.values()) {
            final int[] covariates = rc.getKeySet(offset, errorMode);
            final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
            rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
            qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
            nKeys += 2;
            for (NestedIntegerArray<RecalDatum> covTable : recalibrationTables.getAdditionalTables()) {
                Covariate cov = recalibrationTables.getCovariateForTable(covTable);
                final int covValue = covariates[covariateList.indexByClass(cov.getClass())];
                if (covValue >= 0) {
                    covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
                    nKeys++;
                }
            }
        }
    }
    Assert.assertEquals(nKeys, expectedKeys);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 20 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.

the class SamFileExtract method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(bam_in));
    final SAMFileHeader header = inputSam.getFileHeader();
    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
    final SAMFileHeader header_out = new SAMFileHeader();
    final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
    SAMRecordIterator iter = inputSam.iterator();
    File bed_file = new File(bed_in);
    final Set<String> extract = new HashSet<String>();
    try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
        String line;
        while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
    header_out.setAttribute("VN", header.getAttribute("VN"));
    header_out.setAttribute("SO", header.getAttribute("SO"));
    List<SAMSequenceRecord> seqs = seqdic.getSequences();
    for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
        seqdic_out.addSequence(seq);
    header_out.setSequenceDictionary(seqdic_out);
    for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
    for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
    final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
    while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        if (extract.contains(rec.getReferenceName()))
            outputSam.addAlignment(rec);
    }
    iter.close();
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outputSam.close();
    System.err.println(bam_in + " return true");
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12