use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.
the class GATKToolUnitTest method testReadsHeader.
@Test
public void testReadsHeader() throws Exception {
final GATKTool tool = new TestGATKToolWithReads();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final String[] args = { "-I", bamFile.getCanonicalPath() };
clp.parseArguments(System.out, args);
tool.onStartup();
final SAMFileHeader headerForReads = tool.getHeaderForReads();
final SamReaderFactory factory = //read the file directly and compare headers
SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
try (SamReader samReader = factory.open(bamFile)) {
final SAMFileHeader samFileHeader = samReader.getFileHeader();
Assert.assertEquals(headerForReads, samFileHeader);
}
tool.doWork();
tool.onShutdown();
}
use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.
the class PrintReadsSparkIntegrationTest method testReadFilters.
@Test(dataProvider = "readFilterTestData", groups = "spark")
public void testReadFilters(final String input, final String reference, final String extOut, final List<String> inputArgs, final int expectedCount) throws IOException {
final File outFile = createTempFile("testReadFilter", extOut);
final ArgumentsBuilder args = new ArgumentsBuilder();
args.add("-I");
args.add(new File(TEST_DATA_DIR, input).getAbsolutePath());
args.add("-O");
args.add(outFile.getAbsolutePath());
if (reference != null) {
args.add("-R");
args.add(new File(TEST_DATA_DIR, reference).getAbsolutePath());
}
for (final String filter : inputArgs) {
args.add(filter);
}
runCommandLine(args);
SamReaderFactory factory = SamReaderFactory.makeDefault();
if (reference != null) {
factory = factory.referenceSequence(new File(TEST_DATA_DIR, reference));
}
int count = 0;
try (final SamReader reader = factory.open(outFile)) {
Iterator<SAMRecord> it = reader.iterator();
while (it.hasNext()) {
SAMRecord rec = it.next();
count++;
}
}
Assert.assertEquals(count, expectedCount);
}
use of htsjdk.samtools.SamReaderFactory in project hmftools by hartwigmedical.
the class CountSupplier method fromBam.
@NotNull
public Multimap<Chromosome, CobaltCount> fromBam(@NotNull final String referenceBam, @NotNull final String tumorBam) throws IOException, ExecutionException, InterruptedException {
final File tumorFile = new File(tumorBam);
final File referenceFile = new File(referenceBam);
final SamReaderFactory readerFactory = SamReaderFactory.make();
final String chromosomeLengthFileName = ChromosomeLengthFile.generateFilename(outputDirectory, tumor);
final List<ChromosomeLength> lengths;
try (SamReader reader = readerFactory.open(tumorFile)) {
lengths = ChromosomeLengthFactory.create(reader.getFileHeader());
}
ChromosomeLengthFile.write(chromosomeLengthFileName, lengths);
LOGGER.info("Calculating Read Count from {}", tumorFile.toString());
final List<Future<ChromosomeReadCount>> tumorFutures = createFutures(readerFactory, tumorFile, lengths);
LOGGER.info("Calculating Read Count from {}", referenceFile.toString());
final List<Future<ChromosomeReadCount>> referenceFutures = createFutures(readerFactory, referenceFile, lengths);
final Multimap<String, ReadCount> tumorCounts = fromFutures(tumorFutures);
final Multimap<String, ReadCount> referenceCounts = fromFutures(referenceFutures);
LOGGER.info("Read Count Complete");
return CobaltCountFactory.merge(referenceCounts, tumorCounts);
}
use of htsjdk.samtools.SamReaderFactory in project ASCIIGenome by dariober.
the class Utils method getAlignedReadCount.
public static long getAlignedReadCount(String bam) throws IOException {
/* ------------------------------------------------------ */
/* This chunk prepares SamReader from local bam or URL bam */
UrlValidator urlValidator = new UrlValidator();
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
SamReader samReader;
if (urlValidator.isValid(bam)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(bam)).index(new URL(bam + ".bai")));
} else {
samReader = srf.open(new File(bam));
}
/* ------------------------------------------------------ */
List<SAMSequenceRecord> sequences = samReader.getFileHeader().getSequenceDictionary().getSequences();
long alnCount = 0;
for (SAMSequenceRecord x : sequences) {
alnCount += samReader.indexing().getIndex().getMetaData(x.getSequenceIndex()).getAlignedRecordCount();
}
samReader.close();
return alnCount;
}
use of htsjdk.samtools.SamReaderFactory in project ASCIIGenome by dariober.
the class Utils method initRegionFromFile.
/**
* Get the first chrom string from first line of input file. As you add support for more filetypes you should update
* this function. This method is very dirty and shouldn't be trusted 100%
* @throws InvalidGenomicCoordsException
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidCommandLineException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unused")
public static String initRegionFromFile(String x) throws IOException, InvalidGenomicCoordsException, ClassNotFoundException, InvalidCommandLineException, InvalidRecordException, SQLException {
UrlValidator urlValidator = new UrlValidator();
String region = "";
TrackFormat fmt = Utils.getFileTypeFromName(x);
if (fmt.equals(TrackFormat.BAM)) {
SamReader samReader;
if (urlValidator.isValid(x)) {
samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(new URL(x)));
} else {
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.SILENT);
samReader = srf.open(new File(x));
}
// Default: Start from the first contig in dictionary
region = samReader.getFileHeader().getSequence(0).getSequenceName();
SAMRecordIterator iter = samReader.iterator();
if (iter.hasNext()) {
// If there are records in this BAM, init from first record
SAMRecord rec = iter.next();
region = rec.getContig() + ":" + rec.getAlignmentStart();
samReader.close();
}
return region;
} else if (fmt.equals(TrackFormat.BIGWIG) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigWig(x);
} else if (fmt.equals(TrackFormat.BIGBED) && !urlValidator.isValid(x)) {
// Loading from URL is painfully slow so do not initialize from URL
return initRegionFromBigBed(x);
} else if (urlValidator.isValid(x) && (fmt.equals(TrackFormat.BIGWIG) || fmt.equals(TrackFormat.BIGBED))) {
System.err.println("Refusing to initialize from URL");
throw new InvalidGenomicCoordsException();
} else if (fmt.equals(TrackFormat.TDF)) {
Iterator<String> iter = TDFReader.getReader(x).getChromosomeNames().iterator();
while (iter.hasNext()) {
region = iter.next();
if (!region.equals("All")) {
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
} else {
// Input file appears to be a generic interval file. We expect chrom to be in column 1
// VCF files are also included here since they are either gzip or plain ASCII.
BufferedReader br;
GZIPInputStream gzipStream;
if (x.toLowerCase().endsWith(".gz") || x.toLowerCase().endsWith(".bgz")) {
if (urlValidator.isValid(x)) {
gzipStream = new GZIPInputStream(new URL(x).openStream());
} else {
InputStream fileStream = new FileInputStream(x);
gzipStream = new GZIPInputStream(fileStream);
}
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
br = new BufferedReader(decoder);
} else {
if (urlValidator.isValid(x)) {
InputStream instream = new URL(x).openStream();
Reader decoder = new InputStreamReader(instream, "UTF-8");
br = new BufferedReader(decoder);
} else {
br = new BufferedReader(new FileReader(x));
}
}
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.startsWith("#") || line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (fmt.equals(TrackFormat.VCF)) {
region = line.split("\t")[0] + ":" + line.split("\t")[1];
} else {
IntervalFeature feature = new IntervalFeature(line, fmt, null);
region = feature.getChrom() + ":" + feature.getFrom();
}
br.close();
return region;
}
if (line == null) {
// This means the input has no records
region = "Undefined_contig";
if (fmt.equals(TrackFormat.VCF)) {
SAMSequenceDictionary seqdict = getVCFHeader(x).getSequenceDictionary();
if (seqdict != null) {
Iterator<SAMSequenceRecord> iter = seqdict.getSequences().iterator();
if (iter.hasNext()) {
region = iter.next().getSequenceName();
}
}
}
return region;
}
}
System.err.println("Cannot initialize from " + x);
throw new RuntimeException();
}
Aggregations