Search in sources :

Example 1 with BAMIndex

use of htsjdk.samtools.BAMIndex in project gatk by broadinstitute.

the class NioBam method getChunksBalanced.

private static List<QueryInterval> getChunksBalanced(SamReader bam, int sequenceIndex, int retCount) {
    List<QueryInterval> ret = new ArrayList<>();
    BAMIndex index = bam.indexing().getIndex();
    SAMFileHeader header = bam.getFileHeader();
    SAMSequenceRecord s = header.getSequence(sequenceIndex);
    long totalLength = chunksLength(getChunks(index, sequenceIndex, 1, s.getSequenceLength() + 1));
    if (totalLength == 0) {
        return ret;
    }
    int sofar = 0;
    long targetLength = totalLength / retCount;
    int end = s.getSequenceLength();
    int step = s.getSequenceLength() / (100 * retCount);
    if (step < 1)
        step = 1;
    int start = 1;
    for (int j = step; j < end; j += step) {
        if (j > end)
            j = end;
        List<Chunk> candidate = getChunks(index, sequenceIndex, start, j);
        long size = chunksLength(candidate);
        if (size < targetLength) {
            // not big enough yet
            continue;
        }
        if (size > targetLength * 2) {
        // too large, search for a good separation point
        // TODO
        }
        // good, emit.
        ret.add(new QueryInterval(sequenceIndex, start, j + 1));
        start = j;
        sofar += size;
        if (ret.size() < retCount) {
            targetLength = (totalLength - sofar) / (retCount - ret.size());
        } else {
            targetLength = totalLength / retCount;
        }
    }
    return ret;
}
Also used : ArrayList(java.util.ArrayList) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BAMIndex(htsjdk.samtools.BAMIndex) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Chunk(htsjdk.samtools.Chunk)

Example 2 with BAMIndex

use of htsjdk.samtools.BAMIndex in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromURLs.

private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
    final File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();
    final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });
    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });
    reader.close();
    writer.close();
    cachingReader.close();
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) Chunk(htsjdk.samtools.Chunk) SamReader(htsjdk.samtools.SamReader) DiskBasedBAMFileIndex(htsjdk.samtools.DiskBasedBAMFileIndex) SAMRecord(htsjdk.samtools.SAMRecord) BAMIndex(htsjdk.samtools.BAMIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Aggregations

BAMIndex (htsjdk.samtools.BAMIndex)2 Chunk (htsjdk.samtools.Chunk)2 QueryInterval (htsjdk.samtools.QueryInterval)2 DiskBasedBAMFileIndex (htsjdk.samtools.DiskBasedBAMFileIndex)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SamReader (htsjdk.samtools.SamReader)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Pair (org.apache.commons.lang3.tuple.Pair)1