Search in sources :

Example 31 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.

the class GATKToolUnitTest method testReadsHeader.

@Test
public void testReadsHeader() throws Exception {
    final GATKTool tool = new TestGATKToolWithReads();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
    final String[] args = { "-I", bamFile.getCanonicalPath() };
    clp.parseArguments(System.out, args);
    tool.onStartup();
    final SAMFileHeader headerForReads = tool.getHeaderForReads();
    final SamReaderFactory factory = //read the file directly and compare headers
    SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    try (SamReader samReader = factory.open(bamFile)) {
        final SAMFileHeader samFileHeader = samReader.getFileHeader();
        Assert.assertEquals(headerForReads, samFileHeader);
    }
    tool.doWork();
    tool.onShutdown();
}
Also used : SamReader(htsjdk.samtools.SamReader) CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) SamReaderFactory(htsjdk.samtools.SamReaderFactory) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 32 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.

the class PrintReadsSparkIntegrationTest method testReadFilters.

@Test(dataProvider = "readFilterTestData", groups = "spark")
public void testReadFilters(final String input, final String reference, final String extOut, final List<String> inputArgs, final int expectedCount) throws IOException {
    final File outFile = createTempFile("testReadFilter", extOut);
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add("-I");
    args.add(new File(TEST_DATA_DIR, input).getAbsolutePath());
    args.add("-O");
    args.add(outFile.getAbsolutePath());
    if (reference != null) {
        args.add("-R");
        args.add(new File(TEST_DATA_DIR, reference).getAbsolutePath());
    }
    for (final String filter : inputArgs) {
        args.add(filter);
    }
    runCommandLine(args);
    SamReaderFactory factory = SamReaderFactory.makeDefault();
    if (reference != null) {
        factory = factory.referenceSequence(new File(TEST_DATA_DIR, reference));
    }
    int count = 0;
    try (final SamReader reader = factory.open(outFile)) {
        Iterator<SAMRecord> it = reader.iterator();
        while (it.hasNext()) {
            SAMRecord rec = it.next();
            count++;
        }
    }
    Assert.assertEquals(count, expectedCount);
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecord(htsjdk.samtools.SAMRecord) ArgumentsBuilder(org.broadinstitute.hellbender.utils.test.ArgumentsBuilder) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 33 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project hmftools by hartwigmedical.

the class CountSupplier method fromBam.

@NotNull
public Multimap<Chromosome, CobaltCount> fromBam(@NotNull final String referenceBam, @NotNull final String tumorBam) throws IOException, ExecutionException, InterruptedException {
    final File tumorFile = new File(tumorBam);
    final File referenceFile = new File(referenceBam);
    final SamReaderFactory readerFactory = SamReaderFactory.make();
    final String chromosomeLengthFileName = ChromosomeLengthFile.generateFilename(outputDirectory, tumor);
    final List<ChromosomeLength> lengths;
    try (SamReader reader = readerFactory.open(tumorFile)) {
        lengths = ChromosomeLengthFactory.create(reader.getFileHeader());
    }
    ChromosomeLengthFile.write(chromosomeLengthFileName, lengths);
    LOGGER.info("Calculating Read Count from {}", tumorFile.toString());
    final List<Future<ChromosomeReadCount>> tumorFutures = createFutures(readerFactory, tumorFile, lengths);
    LOGGER.info("Calculating Read Count from {}", referenceFile.toString());
    final List<Future<ChromosomeReadCount>> referenceFutures = createFutures(readerFactory, referenceFile, lengths);
    final Multimap<String, ReadCount> tumorCounts = fromFutures(tumorFutures);
    final Multimap<String, ReadCount> referenceCounts = fromFutures(referenceFutures);
    LOGGER.info("Read Count Complete");
    return CobaltCountFactory.merge(referenceCounts, tumorCounts);
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Future(java.util.concurrent.Future) File(java.io.File) CobaltCountFile(com.hartwig.hmftools.common.cobalt.CobaltCountFile) ChromosomeLengthFile(com.hartwig.hmftools.common.chromosome.ChromosomeLengthFile) CobaltRatioFile(com.hartwig.hmftools.common.cobalt.CobaltRatioFile) ReadCountFile(com.hartwig.hmftools.common.cobalt.ReadCountFile) ChromosomeLength(com.hartwig.hmftools.common.chromosome.ChromosomeLength) ReadCount(com.hartwig.hmftools.common.cobalt.ReadCount) NotNull(org.jetbrains.annotations.NotNull)

Example 34 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project ASCIIGenome by dariober.

the class Utils method getAlignedReadCount.

public static long getAlignedReadCount(String bam) throws IOException {
    /*  ------------------------------------------------------ */
    /* This chunk prepares SamReader from local bam or URL bam */
    UrlValidator urlValidator = new UrlValidator();
    SamReaderFactory srf = SamReaderFactory.make();
    srf.validationStringency(ValidationStringency.SILENT);
    SamReader samReader;
    if (urlValidator.isValid(bam)) {
        samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(bam)).index(new URL(bam + ".bai")));
    } else {
        samReader = srf.open(new File(bam));
    }
    /*  ------------------------------------------------------ */
    List<SAMSequenceRecord> sequences = samReader.getFileHeader().getSequenceDictionary().getSequences();
    long alnCount = 0;
    for (SAMSequenceRecord x : sequences) {
        alnCount += samReader.indexing().getIndex().getMetaData(x.getSequenceIndex()).getAlignedRecordCount();
    }
    samReader.close();
    return alnCount;
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) UrlValidator(org.apache.commons.validator.routines.UrlValidator) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) URL(java.net.URL)

Example 35 with SamReaderFactory

use of htsjdk.samtools.SamReaderFactory in project ASCIIGenome by dariober.

the class Utils method initRegionFromFile.

/**
 * Get the first chrom string from first line of input file. As you add support for more filetypes you should update
 * this function. This method is very dirty and shouldn't be trusted 100%
 * @throws InvalidGenomicCoordsException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws InvalidCommandLineException
 * @throws ClassNotFoundException
 */
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
    UrlValidator urlValidator = new UrlValidator();
    String region = "";
    TrackFormat fmt = Utils.getFileTypeFromName(x);
    if (fmt.equals(TrackFormat.BAM)) {
        SamReader samReader;
        if (urlValidator.isValid(x)) {
            samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
        } else {
            SamReaderFactory srf = SamReaderFactory.make();
            srf.validationStringency(ValidationStringency.SILENT);
            samReader = srf.open(new File(x));
        }
        // Default: Start from the first contig in dictionary
        region = samReader.getFileHeader().getSequence(0).getSequenceName();
        SAMRecordIterator iter = samReader.iterator();
        if (iter.hasNext()) {
            // If there are records in this BAM, init from first record
            SAMRecord rec = iter.next();
            region = rec.getContig() + ":" + rec.getAlignmentStart();
            samReader.close();
        }
        return region;
    } else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigWig(x);
    } else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
        // Loading from URL is painfully slow so do not initialize from URL
        return initRegionFromBigBed(x);
    } else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
        System.err.println("Refusing to initialize from URL");
        throw new InvalidGenomicCoordsException();
    } else if (fmt.equals(TrackFormat.TDF)) {
        Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
        while (iter.hasNext()) {
            region = iter.next();
            if (!region.equals("All")) {
                return region;
            }
        }
        System.err.println("Cannot initialize from " + x);
        throw new RuntimeException();
    } else {
        // Input file appears to be a generic interval file. We expect chrom to be in column 1
        // VCF files are also included here since they are either gzip or plain ASCII.
        BufferedReader br;
        GZIPInputStream gzipStream;
        if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
            if (urlValidator.isValid(x)) {
                gzipStream = new GZIPInputStream(new URL(x).openStream());
            } else {
                InputStream fileStream = new FileInputStream(x);
                gzipStream = new GZIPInputStream(fileStream);
            }
            Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
            br = new BufferedReader(decoder);
        } else {
            if (urlValidator.isValid(x)) {
                InputStream instream = new URL(x).openStream();
                Reader decoder = new InputStreamReader(instream, "UTF-8");
                br = new BufferedReader(decoder);
            } else {
                br = new BufferedReader(new FileReader(x));
            }
        }
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (fmt.equals(TrackFormat.VCF)) {
                region = line.split("\t")[0] + ":" + line.split("\t")[1];
            } else {
                IntervalFeature feature = new IntervalFeature(line, fmt, null);
                region = feature.getChrom() + ":" + feature.getFrom();
            }
            br.close();
            return region;
        }
        if (line == null) {
            // This means the input has no records
            region = "Undefined_contig";
            if (fmt.equals(TrackFormat.VCF)) {
                SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
                if (seqdict != null) {
                    Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
                    if (iter.hasNext()) {
                        region = iter.next().getSequenceName();
                    }
                }
            }
            return region;
        }
    }
    System.err.println("Cannot initialize from " + x);
    throw new RuntimeException();
}
Also used : TrackFormat(tracks.TrackFormat) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Reader(java.io.Reader) AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) TabixReader(htsjdk.tribble.readers.TabixReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) TDFReader(org.broad.igv.tdf.TDFReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URL(java.net.URL) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UrlValidator(org.apache.commons.validator.routines.UrlValidator) InvalidGenomicCoordsException(exceptions.InvalidGenomicCoordsException) BufferedReader(java.io.BufferedReader) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) BBFileReader(org.broad.igv.bbfile.BBFileReader) FileReader(java.io.FileReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IntervalFeature(tracks.IntervalFeature)

Aggregations

SamReaderFactory (htsjdk.samtools.SamReaderFactory)57 SamReader (htsjdk.samtools.SamReader)51 File (java.io.File)43 SAMRecord (htsjdk.samtools.SAMRecord)27 IOException (java.io.IOException)26 SAMFileHeader (htsjdk.samtools.SAMFileHeader)18 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)17 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)14 ArrayList (java.util.ArrayList)13 List (java.util.List)11 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)10 HashSet (java.util.HashSet)10 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 URL (java.net.URL)8 HashMap (java.util.HashMap)8 BufferedReader (java.io.BufferedReader)7 PrintWriter (java.io.PrintWriter)7