use of htsjdk.samtools.BAMIndex in project gatk by broadinstitute.
the class NioBam method getChunksBalanced.
private static List<QueryInterval> getChunksBalanced(SamReader bam, int sequenceIndex, int retCount) {
List<QueryInterval> ret = new ArrayList<>();
BAMIndex index = bam.indexing().getIndex();
SAMFileHeader header = bam.getFileHeader();
SAMSequenceRecord s = header.getSequence(sequenceIndex);
long totalLength = chunksLength(getChunks(index, sequenceIndex, 1, s.getSequenceLength() + 1));
if (totalLength == 0) {
return ret;
}
int sofar = 0;
long targetLength = totalLength / retCount;
int end = s.getSequenceLength();
int step = s.getSequenceLength() / (100 * retCount);
if (step < 1)
step = 1;
int start = 1;
for (int j = step; j < end; j += step) {
if (j > end)
j = end;
List<Chunk> candidate = getChunks(index, sequenceIndex, start, j);
long size = chunksLength(candidate);
if (size < targetLength) {
// not big enough yet
continue;
}
if (size > targetLength * 2) {
// too large, search for a good separation point
// TODO
}
// good, emit.
ret.add(new QueryInterval(sequenceIndex, start, j + 1));
start = j;
sofar += size;
if (ret.size() < retCount) {
targetLength = (totalLength - sofar) / (retCount - ret.size());
} else {
targetLength = totalLength / retCount;
}
}
return ret;
}
use of htsjdk.samtools.BAMIndex in project hmftools by hartwigmedical.
the class BamSlicerApplication method sliceFromURLs.
private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
final File indexFile = downloadIndex(indexUrl);
indexFile.deleteOnExit();
final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
queryIntervalsAndSpan.ifPresent(pair -> {
LOGGER.info("Slicing bam on bed regions...");
final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
writeToSlice(writer, bedIterator);
LOGGER.info("Done writing bed slices.");
});
unmappedChunk.ifPresent(chunk -> {
LOGGER.info("Slicing unmapped reads...");
final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
writeToSlice(writer, unmappedIterator);
LOGGER.info("Done writing unmapped reads.");
});
reader.close();
writer.close();
cachingReader.close();
}
Aggregations