use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class DownsampleSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final Random r = RANDOM_SEED == null ? new Random() : new Random(RANDOM_SEED);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
long total = 0;
long kept = 0;
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, in.getFileHeader(), true)) {
final Map<String, Boolean> decisions = new HashMap<>();
final ProgressLogger progress = new ProgressLogger(logger, (int) 1e7, "Read");
for (final SAMRecord rec : in) {
if (rec.isSecondaryOrSupplementary())
continue;
++total;
final String key = rec.getReadName();
final Boolean previous = decisions.remove(key);
final boolean keeper;
if (previous == null) {
keeper = r.nextDouble() <= PROBABILITY;
if (rec.getReadPairedFlag())
decisions.put(key, keeper);
} else {
keeper = previous;
}
if (keeper) {
out.addAlignment(rec);
++kept;
}
progress.record(rec);
}
} finally {
CloserUtil.close(in);
}
logger.info("Finished! Kept " + kept + " out of " + total + " reads.");
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class AddOrReplaceReadGroups method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
// create the read group we'll be using
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupId);
rg.setLibrary(readGroupLibrary);
rg.setPlatform(readGroupPlatform);
rg.setSample(readGroupSampleName);
rg.setPlatformUnit(readGroupPlatformUnit);
if (readGroupSequencingCenter != null)
rg.setSequencingCenter(readGroupSequencingCenter);
if (readGroupDescription != null)
rg.setDescription(readGroupDescription);
if (readGroupRunDate != null)
rg.setRunDate(readGroupRunDate);
if (readGroupPredictedInsertSize != null)
rg.setPredictedMedianInsertSize(readGroupPredictedInsertSize);
if (readGroupProgramGroup != null)
rg.setProgramGroup(readGroupProgramGroup);
if (readGroupPlatformModel != null)
rg.setPlatformModel(readGroupPlatformModel);
logger.info(String.format("Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));
// create the new header and output file
final SAMFileHeader inHeader = in.getFileHeader();
final SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(inHeader);
outHeader.setReadGroups(Arrays.asList(rg));
if (SORT_ORDER != null)
outHeader.setSortOrder(SORT_ORDER);
try (final SAMFileWriter outWriter = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, outHeader.getSortOrder() == inHeader.getSortOrder())) {
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord read : in) {
read.setAttribute(SAMTag.RG.name(), readGroupId);
outWriter.addAlignment(read);
progress.record(read);
}
} finally {
// cleanup
CloserUtil.close(in);
}
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class FastqToSam method doPaired.
/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
int readCount = 0;
final ProgressLogger progress = new ProgressLogger(LOG);
for (; freader1.hasNext() && freader2.hasNext(); readCount++) {
final FastqRecord frec1 = freader1.next();
final FastqRecord frec2 = freader2.next();
final String frec1Name = getReadName(frec1.getReadName(), true);
final String frec2Name = getReadName(frec2.getReadName(), true);
final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);
final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true);
srec1.setFirstOfPairFlag(true);
srec1.setSecondOfPairFlag(false);
writer.addAlignment(srec1);
progress.record(srec1);
final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true);
srec2.setFirstOfPairFlag(false);
srec2.setSecondOfPairFlag(true);
writer.addAlignment(srec2);
progress.record(srec2);
}
writer.close();
if (freader1.hasNext() || freader2.hasNext()) {
throw new UserException("Input paired fastq files must be the same length");
}
return readCount;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class FilterReads method filterReads.
private void filterReads(final FilteringSamIterator filteringIterator) {
// get OUTPUT header from INPUT and overwrite it if necessary
final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder();
if (SORT_ORDER != null) {
fileHeader.setSortOrder(SORT_ORDER);
}
final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder());
logger.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> output=" + OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]");
final ProgressLogger progress = new ProgressLogger(logger, (int) 1e6, "Written");
// create OUTPUT file
try (final SAMFileWriter outputWriter = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, fileHeader, presorted)) {
while (filteringIterator.hasNext()) {
final SAMRecord rec = filteringIterator.next();
outputWriter.addAlignment(rec);
progress.record(rec);
}
filteringIterator.close();
}
logger.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class MergeSamFiles method doWork.
/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
boolean matchedSortOrders = true;
// Open the files for reading and writing
final List<SamReader> readers = new ArrayList<>();
final List<SAMFileHeader> headers = new ArrayList<>();
{
// Used to try and reduce redundant SDs in memory
SAMSequenceDictionary dict = null;
for (final File inFile : INPUT) {
IOUtil.assertFileIsReadable(inFile);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
readers.add(in);
headers.add(in.getFileHeader());
// replace the duplicate copies with a single dictionary to reduce the memory footprint.
if (dict == null) {
dict = in.getFileHeader().getSequenceDictionary();
} else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
in.getFileHeader().setSequenceDictionary(dict);
}
matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
}
}
// If all the input sort orders match the output sort order then just merge them and
// write on the fly, otherwise setup to merge and sort before writing out the final file
IOUtil.assertFileIsWritable(OUTPUT);
final boolean presorted;
final SAMFileHeader.SortOrder headerMergerSortOrder;
final boolean mergingSamRecordIteratorAssumeSorted;
if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
headerMergerSortOrder = SORT_ORDER;
mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
presorted = true;
} else {
logger.info("Sorting input files using temp directory " + TMP_DIR);
headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
mergingSamRecordIteratorAssumeSorted = false;
presorted = false;
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
final SAMFileHeader header = headerMerger.getMergedHeader();
for (final String comment : COMMENT) {
header.addComment(comment);
}
header.setSortOrder(SORT_ORDER);
final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
if (USE_THREADING) {
samFileWriterFactory.setUseAsyncIo(true);
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
// Lastly loop through and write out the records
final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
logger.info("Finished reading inputs.");
CloserUtil.close(readers);
}
return null;
}
Aggregations