use of htsjdk.samtools.SAMException in project gatk by broadinstitute.
the class BedToIntervalListTest method doTest.
private void doTest(final String inputBed, final String header) throws IOException {
final File outputFile = BaseTest.createTempFile("bed_to_interval_list_test.", ".interval_list");
final BedToIntervalList program = new BedToIntervalList();
final File inputBedFile = new File(TEST_DATA_DIR, inputBed);
program.INPUT = inputBedFile;
program.SEQUENCE_DICTIONARY = new File(TEST_DATA_DIR, header);
program.OUTPUT = outputFile;
program.doWork();
// Assert they are equal
SAMException unexpectedException = null;
try {
IOUtil.assertFilesEqual(new File(inputBedFile.getAbsolutePath() + ".interval_list"), outputFile);
} catch (final SAMException e) {
unexpectedException = e;
}
Assert.assertEquals(unexpectedException, null);
}
use of htsjdk.samtools.SAMException in project gatk by broadinstitute.
the class BuildBamIndex method doWork.
/**
* Main method for the program. Checks that all input files are present and
* readable and that the output file can be written to. Then iterates through
* all the records generating a BAM Index, then writes the bai file.
*/
@Override
protected Object doWork() {
try {
inputUrl = new URL(INPUT);
} catch (MalformedURLException e) {
inputFile = new File(INPUT);
}
// set default output file - input-file.bai
if (OUTPUT == null) {
final String baseFileName;
if (inputUrl != null) {
final String path = inputUrl.getPath();
final int lastSlash = path.lastIndexOf('/');
baseFileName = path.substring(lastSlash + 1, path.length());
} else {
baseFileName = inputFile.getAbsolutePath();
}
if (baseFileName.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION)) {
final int index = baseFileName.lastIndexOf('.');
OUTPUT = new File(baseFileName.substring(0, index) + BAMIndex.BAMIndexSuffix);
} else {
OUTPUT = new File(baseFileName + BAMIndex.BAMIndexSuffix);
}
}
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader bam;
if (inputUrl != null) {
// remote input
bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).disable(SamReaderFactory.Option.EAGERLY_DECODE).enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(SamInputResource.of(inputUrl));
} else {
// input from a normal file
IOUtil.assertFileIsReadable(inputFile);
bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(inputFile);
}
if (bam.type() != SamReader.Type.BAM_TYPE) {
throw new SAMException("Input file must be bam file, not sam file.");
}
if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
throw new SAMException("Input bam file must be sorted by coordinates");
}
BAMIndexer.createIndex(bam, OUTPUT);
logger.info("Successfully wrote bam index file " + OUTPUT);
CloserUtil.close(bam);
return null;
}
use of htsjdk.samtools.SAMException in project gatk by broadinstitute.
the class CachingIndexedFastaSequenceFile method getSubsequenceAt.
/**
* Gets the subsequence of the contig in the range [start,stop]
*
* Uses the sequence cache if possible, or updates the cache to handle the request. If the range
* is larger than the cache itself, just loads the sequence directly, not changing the cache at all
*
* @param contig Contig whose subsequence to retrieve.
* @param start inclusive, 1-based start of region.
* @param stop inclusive, 1-based stop of region.
* @return The partial reference sequence associated with this range. If preserveCase is false, then
* all of the bases in the ReferenceSequence returned by this method will be upper cased.
*/
@Override
public ReferenceSequence getSubsequenceAt(final String contig, long start, final long stop) {
final ReferenceSequence result;
if ((stop - start) >= cacheSize) {
cacheMisses++;
result = super.getSubsequenceAt(contig, start, stop);
if (!preserveCase)
StringUtil.toUpperCase(result.getBases());
if (!preserveIUPAC)
BaseUtils.convertIUPACtoN(result.getBases(), true, start < 1);
} else {
// todo -- potential optimization is to check if contig.name == contig, as this in general will be true
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
if (contigInfo == null) {
throw new UserException.MissingContigInSequenceDictionary(contig, super.getSequenceDictionary());
}
if (stop > contigInfo.getSequenceLength())
throw new SAMException("Query asks for data past end of contig. Query contig " + contig + " start:" + start + " stop:" + stop + " contigLength:" + contigInfo.getSequenceLength());
if (start < cache.start || stop > cache.stop || cache.seq == null || cache.seq.getContigIndex() != contigInfo.getSequenceIndex()) {
cacheMisses++;
cache.start = Math.max(start - cacheMissBackup, 0);
cache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength());
cache.seq = super.getSubsequenceAt(contig, cache.start, cache.stop);
// convert all of the bases in the sequence to upper case if we aren't preserving cases
if (!preserveCase)
StringUtil.toUpperCase(cache.seq.getBases());
if (!preserveIUPAC)
BaseUtils.convertIUPACtoN(cache.seq.getBases(), true, cache.start == 0);
} else {
cacheHits++;
}
// at this point we determine where in the cache we want to extract the requested subsequence
final int cacheOffsetStart = (int) (start - cache.start);
final int cacheOffsetStop = (int) (stop - start + cacheOffsetStart + 1);
try {
result = new ReferenceSequence(cache.seq.getName(), cache.seq.getContigIndex(), Arrays.copyOfRange(cache.seq.getBases(), cacheOffsetStart, cacheOffsetStop));
} catch (ArrayIndexOutOfBoundsException e) {
throw new GATKException(String.format("BUG: bad array indexing. Cache start %d and end %d, request start %d end %d, offset start %d and end %d, base size %d", cache.start, cache.stop, start, stop, cacheOffsetStart, cacheOffsetStop, cache.seq.getBases().length), e);
}
}
// for debugging -- print out our efficiency if requested
if (PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0)
printEfficiency(Level.INFO);
return result;
}
Aggregations