Search in sources :

Example 11 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class MergeVcfs method doWork.

@Override
protected Object doWork() {
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final List<String> sampleList = new ArrayList<>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }
    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }
        if (sequenceDictionary == null)
            sequenceDictionary = fileHeader.getSequenceDictionary();
        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    }
    try (final VariantContextWriter writer = builder.build()) {
        writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
        final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
        while (mergingIterator.hasNext()) {
            final VariantContext context = mergingIterator.next();
            writer.add(context);
            progress.record(context.getContig(), context.getStart());
        }
        CloserUtil.close(mergingIterator);
    }
    return null;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 12 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SortVcf method sortInputs.

/**
     * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
     * <p/>
     * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
     * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
     * MergeVcfs exists for simple merging of presorted inputs.
     *
     * @param readers      - a list of VCFFileReaders, one for each input VCF
     * @param outputHeader - The merged header whose information we intend to use in the final output file
     */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        logger.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec)

Example 13 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SortVcf method writeSortedOutput.

private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}
Also used : Options(htsjdk.variant.variantcontext.writer.Options) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 14 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class CollectRrbsMetrics method doWork.

@Override
protected Object doWork() {
    if (!METRICS_FILE_PREFIX.endsWith(".")) {
        METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
    }
    final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
    final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
    final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
    assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
    final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final ProgressLogger progressLogger = new ProgressLogger(logger);
    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
    for (final SAMRecord samRecord : samReader) {
        progressLogger.record(samRecord);
        if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
            final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
            metricsCollector.acceptRecord(samRecord, referenceSequence);
        }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
    metricsCollector.addAllLevelsToFile(rrbsMetrics);
    // Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
    // we get it out split it apart to the two separate MetricsFiles and write them to file
    final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
    final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
    for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
        summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
        for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
            detailsFile.addMetric(detailMetric);
        }
    }
    summaryFile.write(SUMMARY_OUT);
    detailsFile.write(DETAILS_OUT);
    if (PRODUCE_PLOT) {
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
        executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
        executor.exec();
    }
    CloserUtil.close(samReader);
    return null;
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 15 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class CollectTargetedMetrics method doWork.

/**
     * Asserts that files are readable and writable and then fires off an
     * HsMetricsCalculator instance to do the real work.
     */
@Override
protected Object doWork() {
    for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null)
        IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
    }
    final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord record : reader) {
        collector.acceptRecord(record, null);
        progress.record(record);
    }
    // Write the output file
    final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
    collector.finish();
    collector.addAllLevelsToFile(metrics);
    metrics.write(OUTPUT);
    CloserUtil.close(reader);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile)

Aggregations

ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)31 SamReader (htsjdk.samtools.SamReader)13 SAMRecord (htsjdk.samtools.SAMRecord)12 UserException (org.broadinstitute.hellbender.exceptions.UserException)11 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 File (java.io.File)7 ArrayList (java.util.ArrayList)7 SAMFileWriter (htsjdk.samtools.SAMFileWriter)6 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)6 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)5 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 HashMap (java.util.HashMap)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 MetricsFile (htsjdk.samtools.metrics.MetricsFile)4 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3