use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SortVcf method sortInputs.
/**
* Merge the inputs and sort them by adding each input's content to a single SortingCollection.
* <p/>
* NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
* Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
* MergeVcfs exists for simple merging of presorted inputs.
*
* @param readers - a list of VCFFileReaders, one for each input VCF
* @param outputHeader - The merged header whose information we intend to use in the final output file
*/
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
// NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
// We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
int readerCount = 1;
for (final VCFFileReader reader : readers) {
logger.info("Reading entries from input file " + readerCount);
for (final VariantContext variantContext : reader) {
sorter.add(variantContext);
readProgress.record(variantContext.getContig(), variantContext.getStart());
}
reader.close();
readerCount++;
}
return sorter;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SortVcf method writeSortedOutput.
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
out.writeHeader(outputHeader);
for (final VariantContext variantContext : sortedOutput) {
out.add(variantContext);
writeProgress.record(variantContext.getContig(), variantContext.getStart());
}
out.close();
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class CollectRrbsMetrics method doWork.
@Override
protected Object doWork() {
if (!METRICS_FILE_PREFIX.endsWith(".")) {
METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
}
final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
}
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final ProgressLogger progressLogger = new ProgressLogger(logger);
final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
for (final SAMRecord samRecord : samReader) {
progressLogger.record(samRecord);
if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
metricsCollector.acceptRecord(samRecord, referenceSequence);
}
}
metricsCollector.finish();
final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
metricsCollector.addAllLevelsToFile(rrbsMetrics);
// Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
// we get it out split it apart to the two separate MetricsFiles and write them to file
final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
detailsFile.addMetric(detailMetric);
}
}
summaryFile.write(SUMMARY_OUT);
detailsFile.write(DETAILS_OUT);
if (PRODUCE_PLOT) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
executor.exec();
}
CloserUtil.close(samReader);
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class CollectTargetedMetrics method doWork.
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
@Override
protected Object doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null)
IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
}
final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return null;
}
Aggregations