use of htsjdk.samtools.Chunk in project gatk by broadinstitute.
the class NioBam method getChunksBalanced.
private static List<QueryInterval> getChunksBalanced(SamReader bam, int sequenceIndex, int retCount) {
List<QueryInterval> ret = new ArrayList<>();
BAMIndex index = bam.indexing().getIndex();
SAMFileHeader header = bam.getFileHeader();
SAMSequenceRecord s = header.getSequence(sequenceIndex);
long totalLength = chunksLength(getChunks(index, sequenceIndex, 1, s.getSequenceLength() + 1));
if (totalLength == 0) {
return ret;
}
int sofar = 0;
long targetLength = totalLength / retCount;
int end = s.getSequenceLength();
int step = s.getSequenceLength() / (100 * retCount);
if (step < 1)
step = 1;
int start = 1;
for (int j = step; j < end; j += step) {
if (j > end)
j = end;
List<Chunk> candidate = getChunks(index, sequenceIndex, start, j);
long size = chunksLength(candidate);
if (size < targetLength) {
// not big enough yet
continue;
}
if (size > targetLength * 2) {
// too large, search for a good separation point
// TODO
}
// good, emit.
ret.add(new QueryInterval(sequenceIndex, start, j + 1));
start = j;
sofar += size;
if (ret.size() < retCount) {
targetLength = (totalLength - sofar) / (retCount - ret.size());
} else {
targetLength = totalLength / retCount;
}
}
return ret;
}
use of htsjdk.samtools.Chunk in project hmftools by hartwigmedical.
the class BamSlicerApplication method sliceFromURLs.
private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
final File indexFile = downloadIndex(indexUrl);
indexFile.deleteOnExit();
final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
queryIntervalsAndSpan.ifPresent(pair -> {
LOGGER.info("Slicing bam on bed regions...");
final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
writeToSlice(writer, bedIterator);
LOGGER.info("Done writing bed slices.");
});
unmappedChunk.ifPresent(chunk -> {
LOGGER.info("Slicing unmapped reads...");
final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
writeToSlice(writer, unmappedIterator);
LOGGER.info("Done writing unmapped reads.");
});
reader.close();
writer.close();
cachingReader.close();
}
use of htsjdk.samtools.Chunk in project hmftools by hartwigmedical.
the class BamSlicerApplication method expandChunks.
@NotNull
private static List<Chunk> expandChunks(@NotNull final List<Chunk> chunks) {
final List<Chunk> result = Lists.newArrayList();
for (final Chunk chunk : chunks) {
final long chunkEndBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(chunk.getChunkEnd());
final long extendedEndBlockAddress = chunkEndBlockAddress + BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
final long newChunkEnd = extendedEndBlockAddress > MAX_BLOCK_ADDRESS ? MAX_BLOCK_ADDRESS : extendedEndBlockAddress;
final long chunkEndVirtualPointer = newChunkEnd << 16;
result.add(new Chunk(chunk.getChunkStart(), chunkEndVirtualPointer));
}
return result;
}
use of htsjdk.samtools.Chunk in project jvarkit by lindenb.
the class Biostar172515 method doWork.
@Override
public int doWork(final List<String> inputFiles) {
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, Boolean.TRUE).validationStringency(ValidationStringency.LENIENT);
OutputStream stream = null;
SamReader samReader = null;
Set<String> args = IOUtils.unrollFiles(inputFiles);
try {
stream = super.openFileOrStdoutAsStream(this.outputFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
this.w = xof.createXMLStreamWriter(stream);
this.w.writeStartDocument("UTF-8", "1.0");
this.w.writeStartElement("bai-list");
for (final String filename : args) {
this.w.writeStartElement("bam");
this.w.writeAttribute("bam", filename);
samReader = samReaderFactory.open(SamInputResource.of(filename));
this.w.writeAttribute("has-index", String.valueOf(samReader.hasIndex()));
if (!samReader.hasIndex()) {
this.w.writeEndElement();
samReader.close();
continue;
}
final SamReader.Indexing indexing = samReader.indexing();
if (!indexing.hasBrowseableIndex()) {
this.w.writeComment("no browseable index");
this.w.writeEndElement();
samReader.close();
continue;
}
final SAMSequenceDictionary dict = samReader.getFileHeader().getSequenceDictionary();
this.w.writeAttribute("n_ref", String.valueOf(dict.size()));
final BrowseableBAMIndex baiFile;
try {
baiFile = indexing.getBrowseableIndex();
} catch (Exception err) {
this.w.writeComment("no browseable index");
this.w.writeEndElement();
samReader.close();
continue;
}
for (int tid = 0; tid < dict.size(); ++tid) {
final SAMSequenceRecord ssr = dict.getSequence(tid);
final BAMIndexMetaData baiMetaData = baiFile.getMetaData(tid);
this.w.writeStartElement("reference");
this.w.writeAttribute("ref-id", String.valueOf(tid));
this.w.writeAttribute("ref-name", ssr.getSequenceName());
this.w.writeAttribute("ref-length", String.valueOf(ssr.getSequenceLength()));
this.w.writeAttribute("n_aligned", String.valueOf(baiMetaData.getAlignedRecordCount()));
BinList binList = baiFile.getBinsOverlapping(tid, 1, ssr.getSequenceLength());
int n_bin = 0;
for (@SuppressWarnings("unused") final Bin binItem : binList) n_bin++;
this.w.writeAttribute("n_bin", String.valueOf(n_bin));
this.w.writeAttribute("n_no_coor", String.valueOf(baiMetaData.getUnalignedRecordCount()));
for (final Bin binItem : binList) {
this.w.writeStartElement("bin");
this.w.writeAttribute("first-locus", String.valueOf(baiFile.getFirstLocusInBin(binItem)));
this.w.writeAttribute("last-locus", String.valueOf(baiFile.getLastLocusInBin(binItem)));
this.w.writeAttribute("level", String.valueOf(baiFile.getLevelForBin(binItem)));
final BAMFileSpan span = baiFile.getSpanOverlapping(binItem);
this.w.writeAttribute("first-offset", String.valueOf(span.getFirstOffset()));
final List<Chunk> chunks = span.getChunks();
this.w.writeAttribute("n_chunk", String.valueOf(chunks.size()));
for (final Chunk chunk : chunks) {
this.w.writeEmptyElement("chunk");
this.w.writeAttribute("chunk_beg", String.valueOf(chunk.getChunkStart()));
this.w.writeAttribute("chunk_end", String.valueOf(chunk.getChunkEnd()));
}
this.w.writeEndElement();
}
this.w.writeEndElement();
}
this.w.writeEndElement();
samReader.close();
}
this.w.writeEndElement();
this.w.flush();
this.w.close();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.w);
CloserUtil.close(stream);
CloserUtil.close(samReader);
this.w = null;
}
}
Aggregations