Search in sources :

Example 1 with RuntimeIOException

use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.

the class SplitIntervals method onTraversalStart.

@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }
    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
    final DecimalFormat formatter = new DecimalFormat("0000");
    IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
Also used : IntStream(java.util.stream.IntStream) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) DecimalFormat(java.text.DecimalFormat) IntervalList(htsjdk.samtools.util.IntervalList) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) IntervalArgumentCollection(org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) GATKTool(org.broadinstitute.hellbender.engine.GATKTool) Interval(htsjdk.samtools.util.Interval) List(java.util.List) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) IntervalList(htsjdk.samtools.util.IntervalList) DecimalFormat(java.text.DecimalFormat) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval)

Example 2 with RuntimeIOException

use of htsjdk.samtools.util.RuntimeIOException in project gatk-protected by broadinstitute.

the class SplitIntervals method onTraversalStart.

@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }
    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
    final DecimalFormat formatter = new DecimalFormat("0000");
    IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
Also used : IntStream(java.util.stream.IntStream) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) DecimalFormat(java.text.DecimalFormat) IntervalList(htsjdk.samtools.util.IntervalList) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) IntervalArgumentCollection(org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) GATKTool(org.broadinstitute.hellbender.engine.GATKTool) Interval(htsjdk.samtools.util.Interval) List(java.util.List) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IntervalListScatterer(org.broadinstitute.hellbender.tools.picard.interval.IntervalListScatterer) IntervalList(htsjdk.samtools.util.IntervalList) DecimalFormat(java.text.DecimalFormat) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) File(java.io.File) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval)

Example 3 with RuntimeIOException

use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.

the class GatherVcfs method gatherWithBlockCopying.

/**
     * Assumes that all inputs and outputs are block compressed VCF files and copies them without decompressing and parsing
     * most of the gzip blocks. Will decompress and parse blocks up to the one containing the end of the header in each file
     * (often the first block) and re-compress any data remaining in that block into a new block in the output file. Subsequent
     * blocks (excluding a terminator block if present) are copied directly from input to output.
     */
private static void gatherWithBlockCopying(final List<File> vcfs, final File output) {
    try (final FileOutputStream out = new FileOutputStream(output)) {
        boolean isFirstFile = true;
        for (final File f : vcfs) {
            log.info("Gathering " + f.getAbsolutePath());
            try (final FileInputStream in = new FileInputStream(f)) {
                // a) It's good to check that the end of the file is valid and b) we need to know if there's a terminator block and not copy it
                final BlockCompressedInputStream.FileTermination term = BlockCompressedInputStream.checkTermination(f);
                if (term == BlockCompressedInputStream.FileTermination.DEFECTIVE)
                    throw new UserException.MalformedFile(f.getAbsolutePath() + " does not have a valid GZIP block at the end of the file.");
                if (!isFirstFile) {
                    final BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in, false);
                    boolean lastByteNewline = true;
                    while (in.available() > 0) {
                        // Read a block - blockIn.available() is guaranteed to return the bytes remaining in the block that has been
                        // read, and since we haven't consumed any yet, that is the block size.
                        final int blockLength = blockIn.available();
                        final byte[] blockContents = new byte[blockLength];
                        final int read = blockIn.read(blockContents);
                        Utils.validate(blockLength > 0 && read == blockLength, "Could not read available bytes from BlockCompressedInputStream.");
                        // Scan forward within the block to see if we can find the end of the header within this block
                        int firstNonHeaderByteIndex = -1;
                        for (int i = 0; i < read; ++i) {
                            final byte b = blockContents[i];
                            final boolean thisByteNewline = (b == '\n' || b == '\r');
                            if (lastByteNewline && !thisByteNewline && b != '#') {
                                // Aha!  Found first byte of non-header data in file!
                                firstNonHeaderByteIndex = i;
                                break;
                            }
                            lastByteNewline = thisByteNewline;
                        }
                        // new gzip block and then break out of the while loop
                        if (firstNonHeaderByteIndex >= 0) {
                            final BlockCompressedOutputStream blockOut = new BlockCompressedOutputStream(out, null);
                            blockOut.write(blockContents, firstNonHeaderByteIndex, blockContents.length - firstNonHeaderByteIndex);
                            blockOut.flush();
                            // Don't close blockOut because closing underlying stream would break everything
                            break;
                        }
                    }
                }
                // Copy remainder of input stream into output stream
                final long currentPos = in.getChannel().position();
                final long length = f.length();
                final long skipLast = (term == BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) ? BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length : 0;
                final long bytesToWrite = length - skipLast - currentPos;
                IOUtil.transferByStream(in, out, bytesToWrite);
                isFirstFile = false;
            }
        }
        // And lastly add the Terminator block and close up
        out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
    } catch (final IOException ioe) {
        throw new RuntimeIOException(ioe);
    }
}
Also used : RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream)

Example 4 with RuntimeIOException

use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.

the class FixMateInformation method doWork.

@Override
protected Object doWork() {
    // Open up the input
    boolean allQueryNameSorted = true;
    final List<SamReader> readers = new ArrayList<>();
    for (final File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
        final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
        readers.add(reader);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.queryname)
            allQueryNameSorted = false;
    }
    // or into a temporary file that will overwrite the INPUT file eventually
    if (OUTPUT != null)
        OUTPUT = OUTPUT.getAbsoluteFile();
    final boolean differentOutputSpecified = OUTPUT != null;
    if (differentOutputSpecified) {
        IOUtil.assertFileIsWritable(OUTPUT);
    } else if (INPUT.size() != 1) {
        throw new UserException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
    } else {
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File dir = soleInput.getParentFile().getAbsoluteFile();
        try {
            IOUtil.assertFileIsWritable(soleInput);
            IOUtil.assertDirectoryIsWritable(dir);
            OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
        } catch (final IOException ioe) {
            throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
        }
    }
    // Get the input records merged and sorted by query name as needed
    final PeekableIterator<SAMRecord> iterator;
    final SAMFileHeader header;
    {
        // Deal with merging if necessary
        final Iterator<SAMRecord> tmp;
        if (INPUT.size() > 1) {
            final List<SAMFileHeader> headers = new ArrayList<>(readers.size());
            for (final SamReader reader : readers) {
                headers.add(reader.getFileHeader());
            }
            final SAMFileHeader.SortOrder sortOrder = (allQueryNameSorted ? SAMFileHeader.SortOrder.queryname : SAMFileHeader.SortOrder.unsorted);
            final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
            tmp = new MergingSamRecordIterator(merger, readers, false);
            header = merger.getMergedHeader();
        } else {
            tmp = readers.get(0).iterator();
            header = readers.get(0).getFileHeader();
        }
        // And now deal with re-sorting if necessary
        if (ASSUME_SORTED || allQueryNameSorted) {
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<>(tmp), ADD_MATE_CIGAR);
        } else {
            logger.info("Sorting input into queryname order.");
            final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
            while (tmp.hasNext()) {
                sorter.add(tmp.next());
            }
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {

                @Override
                public void close() {
                    super.close();
                    sorter.cleanup();
                }
            }, ADD_MATE_CIGAR);
            logger.info("Sorting by queryname complete.");
        }
        // Deal with the various sorting complications
        final SAMFileHeader.SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
        logger.info("Output will be sorted by " + outputSortOrder);
        header.setSortOrder(outputSortOrder);
    }
    if (CREATE_INDEX && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("Can't CREATE_INDEX unless sort order is coordinate");
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, header.getSortOrder() == SAMFileHeader.SortOrder.queryname)) {
        logger.info("Traversing query name sorted records and fixing up mate pair information.");
        final ProgressLogger progress = new ProgressLogger(logger);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        iterator.close();
        if (header.getSortOrder() == SAMFileHeader.SortOrder.queryname) {
            logger.info("Closing output file.");
        } else {
            logger.info("Finished processing reads; re-sorting output file.");
        }
    }
    // TODO throw appropriate exceptions instead of writing to log.error and returning
    if (!differentOutputSpecified) {
        logger.info("Replacing input file with fixed file.");
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
        if (!old.exists() && soleInput.renameTo(old)) {
            if (OUTPUT.renameTo(soleInput)) {
                if (!old.delete()) {
                    logger.warn("Could not delete old file: " + old.getAbsolutePath());
                    return null;
                }
                if (CREATE_INDEX) {
                    final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length() - 4) + ".bai");
                    final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length() - 4) + ".bai");
                    if (!newIndex.renameTo(oldIndex)) {
                        logger.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
                    }
                }
            } else {
                logger.error("Could not move new file to " + soleInput.getAbsolutePath());
                logger.error("Input file preserved as: " + old.getAbsolutePath());
                logger.error("New file preserved as: " + OUTPUT.getAbsolutePath());
                return null;
            }
        } else {
            logger.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());
            if (!OUTPUT.delete()) {
                logger.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
            }
            return null;
        }
    }
    CloserUtil.close(readers);
    return null;
}
Also used : MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) PeekableIterator(htsjdk.samtools.util.PeekableIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecord(htsjdk.samtools.SAMRecord) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 5 with RuntimeIOException

use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.

the class SparkUtils method writeBAMHeaderToStream.

/**
     * Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
     * to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
     *
     * @param samFileHeader SAM header to write
     * @param outputStream stream to write the SAM header to
     */
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
    final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
    final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
    final String headerString;
    final Writer stringWriter = new StringWriter();
    new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
    headerString = stringWriter.toString();
    outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
    // calculate and write the length of the SAM file header text and the header text
    outputBinaryCodec.writeString(headerString, true, false);
    // write the sequences binarily.  This is redundant with the text header
    outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
    for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
        outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
        outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
    }
    try {
        blockCompressedOutputStream.flush();
    } catch (final IOException ioe) {
        throw new RuntimeIOException(ioe);
    }
}
Also used : BinaryCodec(htsjdk.samtools.util.BinaryCodec) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException)

Aggregations

RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)21 IOException (java.io.IOException)11 File (java.io.File)6 List (java.util.List)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)3 VCFHeader (htsjdk.variant.vcf.VCFHeader)3 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)3 InputStream (java.io.InputStream)3 ArrayList (java.util.ArrayList)3 Parameter (com.beust.jcommander.Parameter)2 TeeInputStream (com.github.lindenb.jvarkit.io.TeeInputStream)2 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)2 Program (com.github.lindenb.jvarkit.util.jcommander.Program)2 Logger (com.github.lindenb.jvarkit.util.log.Logger)2 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)2 VCFUtils (com.github.lindenb.jvarkit.util.vcf.VCFUtils)2 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)2