Search in sources :

Example 1 with Chunk

use of htsjdk.samtools.Chunk in project gatk by broadinstitute.

the class NioBam method getChunksBalanced.

private static List<QueryInterval> getChunksBalanced(SamReader bam, int sequenceIndex, int retCount) {
    List<QueryInterval> ret = new ArrayList<>();
    BAMIndex index = bam.indexing().getIndex();
    SAMFileHeader header = bam.getFileHeader();
    SAMSequenceRecord s = header.getSequence(sequenceIndex);
    long totalLength = chunksLength(getChunks(index, sequenceIndex, 1, s.getSequenceLength() + 1));
    if (totalLength == 0) {
        return ret;
    }
    int sofar = 0;
    long targetLength = totalLength / retCount;
    int end = s.getSequenceLength();
    int step = s.getSequenceLength() / (100 * retCount);
    if (step < 1)
        step = 1;
    int start = 1;
    for (int j = step; j < end; j += step) {
        if (j > end)
            j = end;
        List<Chunk> candidate = getChunks(index, sequenceIndex, start, j);
        long size = chunksLength(candidate);
        if (size < targetLength) {
            // not big enough yet
            continue;
        }
        if (size > targetLength * 2) {
        // too large, search for a good separation point
        // TODO
        }
        // good, emit.
        ret.add(new QueryInterval(sequenceIndex, start, j + 1));
        start = j;
        sofar += size;
        if (ret.size() < retCount) {
            targetLength = (totalLength - sofar) / (retCount - ret.size());
        } else {
            targetLength = totalLength / retCount;
        }
    }
    return ret;
}
Also used : ArrayList(java.util.ArrayList) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BAMIndex(htsjdk.samtools.BAMIndex) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Chunk(htsjdk.samtools.Chunk)

Example 2 with Chunk

use of htsjdk.samtools.Chunk in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromURLs.

private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
    final File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();
    final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });
    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });
    reader.close();
    writer.close();
    cachingReader.close();
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) Chunk(htsjdk.samtools.Chunk) SamReader(htsjdk.samtools.SamReader) DiskBasedBAMFileIndex(htsjdk.samtools.DiskBasedBAMFileIndex) SAMRecord(htsjdk.samtools.SAMRecord) BAMIndex(htsjdk.samtools.BAMIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with Chunk

use of htsjdk.samtools.Chunk in project hmftools by hartwigmedical.

the class BamSlicerApplication method expandChunks.

@NotNull
private static List<Chunk> expandChunks(@NotNull final List<Chunk> chunks) {
    final List<Chunk> result = Lists.newArrayList();
    for (final Chunk chunk : chunks) {
        final long chunkEndBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(chunk.getChunkEnd());
        final long extendedEndBlockAddress = chunkEndBlockAddress + BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
        final long newChunkEnd = extendedEndBlockAddress > MAX_BLOCK_ADDRESS ? MAX_BLOCK_ADDRESS : extendedEndBlockAddress;
        final long chunkEndVirtualPointer = newChunkEnd << 16;
        result.add(new Chunk(chunk.getChunkStart(), chunkEndVirtualPointer));
    }
    return result;
}
Also used : Chunk(htsjdk.samtools.Chunk) NotNull(org.jetbrains.annotations.NotNull)

Example 4 with Chunk

use of htsjdk.samtools.Chunk in project jvarkit by lindenb.

the class Biostar172515 method doWork.

@Override
public int doWork(final List<String> inputFiles) {
    final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, Boolean.TRUE).validationStringency(ValidationStringency.LENIENT);
    OutputStream stream = null;
    SamReader samReader = null;
    Set<String> args = IOUtils.unrollFiles(inputFiles);
    try {
        stream = super.openFileOrStdoutAsStream(this.outputFile);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        this.w = xof.createXMLStreamWriter(stream);
        this.w.writeStartDocument("UTF-8", "1.0");
        this.w.writeStartElement("bai-list");
        for (final String filename : args) {
            this.w.writeStartElement("bam");
            this.w.writeAttribute("bam", filename);
            samReader = samReaderFactory.open(SamInputResource.of(filename));
            this.w.writeAttribute("has-index", String.valueOf(samReader.hasIndex()));
            if (!samReader.hasIndex()) {
                this.w.writeEndElement();
                samReader.close();
                continue;
            }
            final SamReader.Indexing indexing = samReader.indexing();
            if (!indexing.hasBrowseableIndex()) {
                this.w.writeComment("no browseable index");
                this.w.writeEndElement();
                samReader.close();
                continue;
            }
            final SAMSequenceDictionary dict = samReader.getFileHeader().getSequenceDictionary();
            this.w.writeAttribute("n_ref", String.valueOf(dict.size()));
            final BrowseableBAMIndex baiFile;
            try {
                baiFile = indexing.getBrowseableIndex();
            } catch (Exception err) {
                this.w.writeComment("no browseable index");
                this.w.writeEndElement();
                samReader.close();
                continue;
            }
            for (int tid = 0; tid < dict.size(); ++tid) {
                final SAMSequenceRecord ssr = dict.getSequence(tid);
                final BAMIndexMetaData baiMetaData = baiFile.getMetaData(tid);
                this.w.writeStartElement("reference");
                this.w.writeAttribute("ref-id", String.valueOf(tid));
                this.w.writeAttribute("ref-name", ssr.getSequenceName());
                this.w.writeAttribute("ref-length", String.valueOf(ssr.getSequenceLength()));
                this.w.writeAttribute("n_aligned", String.valueOf(baiMetaData.getAlignedRecordCount()));
                BinList binList = baiFile.getBinsOverlapping(tid, 1, ssr.getSequenceLength());
                int n_bin = 0;
                for (@SuppressWarnings("unused") final Bin binItem : binList) n_bin++;
                this.w.writeAttribute("n_bin", String.valueOf(n_bin));
                this.w.writeAttribute("n_no_coor", String.valueOf(baiMetaData.getUnalignedRecordCount()));
                for (final Bin binItem : binList) {
                    this.w.writeStartElement("bin");
                    this.w.writeAttribute("first-locus", String.valueOf(baiFile.getFirstLocusInBin(binItem)));
                    this.w.writeAttribute("last-locus", String.valueOf(baiFile.getLastLocusInBin(binItem)));
                    this.w.writeAttribute("level", String.valueOf(baiFile.getLevelForBin(binItem)));
                    final BAMFileSpan span = baiFile.getSpanOverlapping(binItem);
                    this.w.writeAttribute("first-offset", String.valueOf(span.getFirstOffset()));
                    final List<Chunk> chunks = span.getChunks();
                    this.w.writeAttribute("n_chunk", String.valueOf(chunks.size()));
                    for (final Chunk chunk : chunks) {
                        this.w.writeEmptyElement("chunk");
                        this.w.writeAttribute("chunk_beg", String.valueOf(chunk.getChunkStart()));
                        this.w.writeAttribute("chunk_end", String.valueOf(chunk.getChunkEnd()));
                    }
                    this.w.writeEndElement();
                }
                this.w.writeEndElement();
            }
            this.w.writeEndElement();
            samReader.close();
        }
        this.w.writeEndElement();
        this.w.flush();
        this.w.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.w);
        CloserUtil.close(stream);
        CloserUtil.close(samReader);
        this.w = null;
    }
}
Also used : BrowseableBAMIndex(htsjdk.samtools.BrowseableBAMIndex) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BAMIndexMetaData(htsjdk.samtools.BAMIndexMetaData) Bin(htsjdk.samtools.Bin) OutputStream(java.io.OutputStream) BAMFileSpan(htsjdk.samtools.BAMFileSpan) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Chunk(htsjdk.samtools.Chunk) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) BinList(htsjdk.samtools.BinList)

Aggregations

Chunk (htsjdk.samtools.Chunk)4 BAMIndex (htsjdk.samtools.BAMIndex)2 QueryInterval (htsjdk.samtools.QueryInterval)2 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)2 SamReader (htsjdk.samtools.SamReader)2 BAMFileSpan (htsjdk.samtools.BAMFileSpan)1 BAMIndexMetaData (htsjdk.samtools.BAMIndexMetaData)1 Bin (htsjdk.samtools.Bin)1 BinList (htsjdk.samtools.BinList)1 BrowseableBAMIndex (htsjdk.samtools.BrowseableBAMIndex)1 DiskBasedBAMFileIndex (htsjdk.samtools.DiskBasedBAMFileIndex)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 File (java.io.File)1 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1