use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class FastqToSam method doUnpaired.
/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
int readCount = 0;
final ProgressLogger progress = new ProgressLogger(LOG);
for (; freader.hasNext(); readCount++) {
final FastqRecord frec = freader.next();
final SAMRecord srec = createSamRecord(writer.getFileHeader(), getReadName(frec.getReadName(), false), frec, false);
srec.setReadPairedFlag(false);
writer.addAlignment(srec);
progress.record(srec);
}
writer.close();
return readCount;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class FixMateInformation method doWork.
@Override
protected Object doWork() {
// Open up the input
boolean allQueryNameSorted = true;
final List<SamReader> readers = new ArrayList<>();
for (final File f : INPUT) {
IOUtil.assertFileIsReadable(f);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
readers.add(reader);
if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.queryname)
allQueryNameSorted = false;
}
// or into a temporary file that will overwrite the INPUT file eventually
if (OUTPUT != null)
OUTPUT = OUTPUT.getAbsoluteFile();
final boolean differentOutputSpecified = OUTPUT != null;
if (differentOutputSpecified) {
IOUtil.assertFileIsWritable(OUTPUT);
} else if (INPUT.size() != 1) {
throw new UserException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
} else {
final File soleInput = INPUT.get(0).getAbsoluteFile();
final File dir = soleInput.getParentFile().getAbsoluteFile();
try {
IOUtil.assertFileIsWritable(soleInput);
IOUtil.assertDirectoryIsWritable(dir);
OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
} catch (final IOException ioe) {
throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
}
}
// Get the input records merged and sorted by query name as needed
final PeekableIterator<SAMRecord> iterator;
final SAMFileHeader header;
{
// Deal with merging if necessary
final Iterator<SAMRecord> tmp;
if (INPUT.size() > 1) {
final List<SAMFileHeader> headers = new ArrayList<>(readers.size());
for (final SamReader reader : readers) {
headers.add(reader.getFileHeader());
}
final SAMFileHeader.SortOrder sortOrder = (allQueryNameSorted ? SAMFileHeader.SortOrder.queryname : SAMFileHeader.SortOrder.unsorted);
final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
tmp = new MergingSamRecordIterator(merger, readers, false);
header = merger.getMergedHeader();
} else {
tmp = readers.get(0).iterator();
header = readers.get(0).getFileHeader();
}
// And now deal with re-sorting if necessary
if (ASSUME_SORTED || allQueryNameSorted) {
iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<>(tmp), ADD_MATE_CIGAR);
} else {
logger.info("Sorting input into queryname order.");
final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
while (tmp.hasNext()) {
sorter.add(tmp.next());
}
iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {
@Override
public void close() {
super.close();
sorter.cleanup();
}
}, ADD_MATE_CIGAR);
logger.info("Sorting by queryname complete.");
}
// Deal with the various sorting complications
final SAMFileHeader.SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
logger.info("Output will be sorted by " + outputSortOrder);
header.setSortOrder(outputSortOrder);
}
if (CREATE_INDEX && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("Can't CREATE_INDEX unless sort order is coordinate");
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, header.getSortOrder() == SAMFileHeader.SortOrder.queryname)) {
logger.info("Traversing query name sorted records and fixing up mate pair information.");
final ProgressLogger progress = new ProgressLogger(logger);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
iterator.close();
if (header.getSortOrder() == SAMFileHeader.SortOrder.queryname) {
logger.info("Closing output file.");
} else {
logger.info("Finished processing reads; re-sorting output file.");
}
}
// TODO throw appropriate exceptions instead of writing to log.error and returning
if (!differentOutputSpecified) {
logger.info("Replacing input file with fixed file.");
final File soleInput = INPUT.get(0).getAbsoluteFile();
final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
if (!old.exists() && soleInput.renameTo(old)) {
if (OUTPUT.renameTo(soleInput)) {
if (!old.delete()) {
logger.warn("Could not delete old file: " + old.getAbsolutePath());
return null;
}
if (CREATE_INDEX) {
final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length() - 4) + ".bai");
final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length() - 4) + ".bai");
if (!newIndex.renameTo(oldIndex)) {
logger.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
}
}
} else {
logger.error("Could not move new file to " + soleInput.getAbsolutePath());
logger.error("Input file preserved as: " + old.getAbsolutePath());
logger.error("New file preserved as: " + OUTPUT.getAbsolutePath());
return null;
}
} else {
logger.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());
if (!OUTPUT.delete()) {
logger.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
}
return null;
}
}
CloserUtil.close(readers);
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class MakeSitesOnlyVcf method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final VCFFileReader reader = new VCFFileReader(INPUT, false);
final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final ProgressLogger progress = new ProgressLogger(logger, 10000);
// Setup the site-only file writer
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
if (CREATE_INDEX)
builder.setOption(Options.INDEX_ON_THE_FLY);
else
builder.unsetOption(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter writer = builder.build()) {
final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
writer.writeHeader(header);
// Go through the input, strip the records and write them to the output
final CloseableIterator<VariantContext> iterator = reader.iterator();
while (iterator.hasNext()) {
final VariantContext full = iterator.next();
final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
writer.add(site);
progress.record(site.getContig(), site.getStart());
}
CloserUtil.close(iterator);
CloserUtil.close(reader);
}
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SortVcf method sortInputs.
/**
* Merge the inputs and sort them by adding each input's content to a single SortingCollection.
* <p/>
* NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
* Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
* MergeVcfs exists for simple merging of presorted inputs.
*
* @param readers - a list of VCFFileReaders, one for each input VCF
* @param outputHeader - The merged header whose information we intend to use in the final output file
*/
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
// NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
// We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
int readerCount = 1;
for (final VCFFileReader reader : readers) {
logger.info("Reading entries from input file " + readerCount);
for (final VariantContext variantContext : reader) {
sorter.add(variantContext);
readProgress.record(variantContext.getContig(), variantContext.getStart());
}
reader.close();
readerCount++;
}
return sorter;
}
Aggregations