use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class SplitIntervals method onTraversalStart.
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk-protected by broadinstitute.
the class SplitIntervals method onTraversalStart.
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class GatherVcfs method gatherWithBlockCopying.
/**
* Assumes that all inputs and outputs are block compressed VCF files and copies them without decompressing and parsing
* most of the gzip blocks. Will decompress and parse blocks up to the one containing the end of the header in each file
* (often the first block) and re-compress any data remaining in that block into a new block in the output file. Subsequent
* blocks (excluding a terminator block if present) are copied directly from input to output.
*/
private static void gatherWithBlockCopying(final List<File> vcfs, final File output) {
try (final FileOutputStream out = new FileOutputStream(output)) {
boolean isFirstFile = true;
for (final File f : vcfs) {
log.info("Gathering " + f.getAbsolutePath());
try (final FileInputStream in = new FileInputStream(f)) {
// a) It's good to check that the end of the file is valid and b) we need to know if there's a terminator block and not copy it
final BlockCompressedInputStream.FileTermination term = BlockCompressedInputStream.checkTermination(f);
if (term == BlockCompressedInputStream.FileTermination.DEFECTIVE)
throw new UserException.MalformedFile(f.getAbsolutePath() + " does not have a valid GZIP block at the end of the file.");
if (!isFirstFile) {
final BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in, false);
boolean lastByteNewline = true;
while (in.available() > 0) {
// Read a block - blockIn.available() is guaranteed to return the bytes remaining in the block that has been
// read, and since we haven't consumed any yet, that is the block size.
final int blockLength = blockIn.available();
final byte[] blockContents = new byte[blockLength];
final int read = blockIn.read(blockContents);
Utils.validate(blockLength > 0 && read == blockLength, "Could not read available bytes from BlockCompressedInputStream.");
// Scan forward within the block to see if we can find the end of the header within this block
int firstNonHeaderByteIndex = -1;
for (int i = 0; i < read; ++i) {
final byte b = blockContents[i];
final boolean thisByteNewline = (b == '\n' || b == '\r');
if (lastByteNewline && !thisByteNewline && b != '#') {
// Aha! Found first byte of non-header data in file!
firstNonHeaderByteIndex = i;
break;
}
lastByteNewline = thisByteNewline;
}
// new gzip block and then break out of the while loop
if (firstNonHeaderByteIndex >= 0) {
final BlockCompressedOutputStream blockOut = new BlockCompressedOutputStream(out, null);
blockOut.write(blockContents, firstNonHeaderByteIndex, blockContents.length - firstNonHeaderByteIndex);
blockOut.flush();
// Don't close blockOut because closing underlying stream would break everything
break;
}
}
}
// Copy remainder of input stream into output stream
final long currentPos = in.getChannel().position();
final long length = f.length();
final long skipLast = (term == BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) ? BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length : 0;
final long bytesToWrite = length - skipLast - currentPos;
IOUtil.transferByStream(in, out, bytesToWrite);
isFirstFile = false;
}
}
// And lastly add the Terminator block and close up
out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class FixMateInformation method doWork.
@Override
protected Object doWork() {
// Open up the input
boolean allQueryNameSorted = true;
final List<SamReader> readers = new ArrayList<>();
for (final File f : INPUT) {
IOUtil.assertFileIsReadable(f);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
readers.add(reader);
if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.queryname)
allQueryNameSorted = false;
}
// or into a temporary file that will overwrite the INPUT file eventually
if (OUTPUT != null)
OUTPUT = OUTPUT.getAbsoluteFile();
final boolean differentOutputSpecified = OUTPUT != null;
if (differentOutputSpecified) {
IOUtil.assertFileIsWritable(OUTPUT);
} else if (INPUT.size() != 1) {
throw new UserException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
} else {
final File soleInput = INPUT.get(0).getAbsoluteFile();
final File dir = soleInput.getParentFile().getAbsoluteFile();
try {
IOUtil.assertFileIsWritable(soleInput);
IOUtil.assertDirectoryIsWritable(dir);
OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
} catch (final IOException ioe) {
throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
}
}
// Get the input records merged and sorted by query name as needed
final PeekableIterator<SAMRecord> iterator;
final SAMFileHeader header;
{
// Deal with merging if necessary
final Iterator<SAMRecord> tmp;
if (INPUT.size() > 1) {
final List<SAMFileHeader> headers = new ArrayList<>(readers.size());
for (final SamReader reader : readers) {
headers.add(reader.getFileHeader());
}
final SAMFileHeader.SortOrder sortOrder = (allQueryNameSorted ? SAMFileHeader.SortOrder.queryname : SAMFileHeader.SortOrder.unsorted);
final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
tmp = new MergingSamRecordIterator(merger, readers, false);
header = merger.getMergedHeader();
} else {
tmp = readers.get(0).iterator();
header = readers.get(0).getFileHeader();
}
// And now deal with re-sorting if necessary
if (ASSUME_SORTED || allQueryNameSorted) {
iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<>(tmp), ADD_MATE_CIGAR);
} else {
logger.info("Sorting input into queryname order.");
final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
while (tmp.hasNext()) {
sorter.add(tmp.next());
}
iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {
@Override
public void close() {
super.close();
sorter.cleanup();
}
}, ADD_MATE_CIGAR);
logger.info("Sorting by queryname complete.");
}
// Deal with the various sorting complications
final SAMFileHeader.SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
logger.info("Output will be sorted by " + outputSortOrder);
header.setSortOrder(outputSortOrder);
}
if (CREATE_INDEX && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("Can't CREATE_INDEX unless sort order is coordinate");
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, header.getSortOrder() == SAMFileHeader.SortOrder.queryname)) {
logger.info("Traversing query name sorted records and fixing up mate pair information.");
final ProgressLogger progress = new ProgressLogger(logger);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
iterator.close();
if (header.getSortOrder() == SAMFileHeader.SortOrder.queryname) {
logger.info("Closing output file.");
} else {
logger.info("Finished processing reads; re-sorting output file.");
}
}
// TODO throw appropriate exceptions instead of writing to log.error and returning
if (!differentOutputSpecified) {
logger.info("Replacing input file with fixed file.");
final File soleInput = INPUT.get(0).getAbsoluteFile();
final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
if (!old.exists() && soleInput.renameTo(old)) {
if (OUTPUT.renameTo(soleInput)) {
if (!old.delete()) {
logger.warn("Could not delete old file: " + old.getAbsolutePath());
return null;
}
if (CREATE_INDEX) {
final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length() - 4) + ".bai");
final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length() - 4) + ".bai");
if (!newIndex.renameTo(oldIndex)) {
logger.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
}
}
} else {
logger.error("Could not move new file to " + soleInput.getAbsolutePath());
logger.error("Input file preserved as: " + old.getAbsolutePath());
logger.error("New file preserved as: " + OUTPUT.getAbsolutePath());
return null;
}
} else {
logger.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());
if (!OUTPUT.delete()) {
logger.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
}
return null;
}
}
CloserUtil.close(readers);
return null;
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class SparkUtils method writeBAMHeaderToStream.
/**
* Private helper method for {@link #convertHeaderlessHadoopBamShardToBam} that takes a SAMFileHeader and writes it
* to the provided `OutputStream`, correctly encoded for the BAM format and preceded by the BAM magic bytes.
*
* @param samFileHeader SAM header to write
* @param outputStream stream to write the SAM header to
*/
private static void writeBAMHeaderToStream(final SAMFileHeader samFileHeader, final OutputStream outputStream) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
final BinaryCodec outputBinaryCodec = new BinaryCodec(new DataOutputStream(blockCompressedOutputStream));
final String headerString;
final Writer stringWriter = new StringWriter();
new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
headerString = stringWriter.toString();
outputBinaryCodec.writeBytes(ReadUtils.BAM_MAGIC);
// calculate and write the length of the SAM file header text and the header text
outputBinaryCodec.writeString(headerString, true, false);
// write the sequences binarily. This is redundant with the text header
outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size());
for (final SAMSequenceRecord sequenceRecord : samFileHeader.getSequenceDictionary().getSequences()) {
outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
}
try {
blockCompressedOutputStream.flush();
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
Aggregations