Search in sources :

Example 76 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class GatherVcfs method doWork.

@Override
protected Object doWork() {
    log.info("Checking inputs.");
    final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList());
    if (!ignoreSafetyChecks) {
        for (final Path f : inputPaths) {
            IOUtil.assertFileIsReadable(f);
        }
    }
    IOUtil.assertFileIsWritable(output);
    final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("In order to index the resulting VCF input VCFs must contain ##contig lines.");
    }
    if (!ignoreSafetyChecks) {
        log.info("Checking file headers and first records to ensure compatibility.");
        assertSameSamplesAndValidOrdering(inputPaths);
    }
    if (!useConventionalGather && areAllBlockCompressed(inputPaths) && areAllBlockCompressed(CollectionUtil.makeList(output.toPath()))) {
        final List<File> inputFiles = inputs.stream().map(File::new).collect(Collectors.toList());
        log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files.");
        if (CREATE_INDEX)
            log.warn("Index creation not currently supported when gathering block compressed VCFs.");
        gatherWithBlockCopying(inputFiles, output);
    } else {
        log.info("Gathering by conventional means.");
        gatherConventionally(sequenceDictionary, CREATE_INDEX, inputPaths, output, cloudPrefetchBuffer);
    }
    return null;
}
Also used : Path(java.nio.file.Path) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 77 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class PairedVariantSubContextIterator method doWork.

// TODO: add optimization if the samples are in the same file
// TODO: add option for auto-detect pairs based on same sample name
// TODO: allow multiple sample-pairs in one pass
@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(TRUTH_VCF);
    IOUtil.assertFileIsReadable(CALL_VCF);
    final File summaryMetricsFile = new File(OUTPUT + SUMMARY_METRICS_FILE_EXTENSION);
    final File detailedMetricsFile = new File(OUTPUT + DETAILED_METRICS_FILE_EXTENSION);
    final File contingencyMetricsFile = new File(OUTPUT + CONTINGENCY_METRICS_FILE_EXTENSION);
    IOUtil.assertFileIsWritable(summaryMetricsFile);
    IOUtil.assertFileIsWritable(detailedMetricsFile);
    IOUtil.assertFileIsWritable(contingencyMetricsFile);
    final boolean usingIntervals = this.INTERVALS != null && !this.INTERVALS.isEmpty();
    IntervalList intervals = null;
    SAMSequenceDictionary intervalsSamSequenceDictionary = null;
    if (usingIntervals) {
        logger.info("Loading up region lists.");
        long genomeBaseCount = 0;
        for (final File f : INTERVALS) {
            IOUtil.assertFileIsReadable(f);
            final IntervalList tmpIntervalList = IntervalList.fromFile(f);
            if (genomeBaseCount == 0) {
                // Don't count the reference length more than once.
                intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
                genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
            }
            if (intervals == null)
                intervals = tmpIntervalList;
            else if (INTERSECT_INTERVALS)
                intervals = IntervalList.intersection(intervals, tmpIntervalList);
            else
                intervals = IntervalList.union(intervals, tmpIntervalList);
        }
        if (intervals != null) {
            intervals = intervals.uniqued();
        }
        logger.info("Finished loading up region lists.");
    }
    final VCFFileReader truthReader = new VCFFileReader(TRUTH_VCF, USE_VCF_INDEX);
    final VCFFileReader callReader = new VCFFileReader(CALL_VCF, USE_VCF_INDEX);
    // Check that the samples actually exist in the files!
    if (!truthReader.getFileHeader().getGenotypeSamples().contains(TRUTH_SAMPLE)) {
        throw new UserException("File " + TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + TRUTH_SAMPLE);
    }
    if (!callReader.getFileHeader().getGenotypeSamples().contains(CALL_SAMPLE)) {
        throw new UserException("File " + CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + CALL_SAMPLE);
    }
    // Verify that both VCFs have the same Sequence Dictionary
    SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());
    if (usingIntervals) {
        // If using intervals, verify that the sequence dictionaries agree with those of the VCFs
        SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
    }
    // Build the pair of iterators over the regions of interest
    final Iterator<VariantContext> truthIterator, callIterator;
    if (usingIntervals) {
        truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
        callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
    } else {
        truthIterator = truthReader.iterator();
        callIterator = callReader.iterator();
    }
    // Now do the iteration and count things up
    final PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator(truthIterator, TRUTH_SAMPLE, callIterator, CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
    snpCounter = new GenotypeConcordanceCounts();
    indelCounter = new GenotypeConcordanceCounts();
    // A map to keep track of the count of Truth/Call States which we could not successfully classify
    final Map<String, Integer> unClassifiedStatesMap = new HashMap<>();
    logger.info("Starting iteration over variants.");
    while (pairedIterator.hasNext()) {
        final VcTuple tuple = pairedIterator.next();
        final VariantContext.Type truthVariantContextType = tuple.truthVariantContext != null ? tuple.truthVariantContext.getType() : NO_VARIATION;
        final VariantContext.Type callVariantContextType = tuple.callVariantContext != null ? tuple.callVariantContext.getType() : NO_VARIATION;
        // A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
        // Unclassified include MIXED/MNP/Symbolic...
        boolean stateClassified = false;
        final TruthAndCallStates truthAndCallStates = determineState(tuple.truthVariantContext, TRUTH_SAMPLE, tuple.callVariantContext, CALL_SAMPLE, MIN_GQ, MIN_DP);
        if (truthVariantContextType == SNP) {
            if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
                // Note.  If truth is SNP and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
                snpCounter.increment(truthAndCallStates);
                stateClassified = true;
            }
        } else if (truthVariantContextType == INDEL) {
            // Note.  If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
            if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
                indelCounter.increment(truthAndCallStates);
                stateClassified = true;
            }
        } else if (truthVariantContextType == MIXED) {
            // Note.  If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
            if (callVariantContextType == SNP) {
                snpCounter.increment(truthAndCallStates);
                stateClassified = true;
            } else // Note.  If truth is MIXED and call is INDEL, the event will be logged in the snpCounter, with column = MIXED
            if (callVariantContextType == INDEL) {
                indelCounter.increment(truthAndCallStates);
                stateClassified = true;
            }
        } else if (truthVariantContextType == NO_VARIATION) {
            if (callVariantContextType == SNP) {
                snpCounter.increment(truthAndCallStates);
                stateClassified = true;
            } else if (callVariantContextType == INDEL) {
                indelCounter.increment(truthAndCallStates);
                stateClassified = true;
            }
        }
        if (!stateClassified) {
            final String condition = truthVariantContextType + " " + callVariantContextType;
            Integer count = unClassifiedStatesMap.get(condition);
            if (count == null)
                count = 0;
            unClassifiedStatesMap.put(condition, ++count);
        }
        final VariantContext variantContextForLogging = tuple.truthVariantContext != null ? tuple.truthVariantContext : tuple.callVariantContext;
        progress.record(variantContextForLogging.getContig(), variantContextForLogging.getStart());
    }
    // Calculate and store the summary-level metrics
    final MetricsFile<GenotypeConcordanceSummaryMetrics, ?> genotypeConcordanceSummaryMetricsFile = getMetricsFile();
    GenotypeConcordanceSummaryMetrics summaryMetrics = new GenotypeConcordanceSummaryMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
    summaryMetrics = new GenotypeConcordanceSummaryMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
    genotypeConcordanceSummaryMetricsFile.write(summaryMetricsFile);
    // Calculate and store the detailed metrics for both SNP and indels
    final MetricsFile<GenotypeConcordanceDetailMetrics, ?> genotypeConcordanceDetailMetrics = getMetricsFile();
    outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    genotypeConcordanceDetailMetrics.write(detailedMetricsFile);
    // Calculate and score the contingency metrics
    final MetricsFile<GenotypeConcordanceContingencyMetrics, ?> genotypeConcordanceContingencyMetricsFile = getMetricsFile();
    GenotypeConcordanceContingencyMetrics contingencyMetrics = new GenotypeConcordanceContingencyMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
    contingencyMetrics = new GenotypeConcordanceContingencyMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
    genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
    genotypeConcordanceContingencyMetricsFile.write(contingencyMetricsFile);
    for (final String condition : unClassifiedStatesMap.keySet()) {
        logger.info("Uncovered truth/call Variant Context Type Counts: " + condition + " " + unClassifiedStatesMap.get(condition));
    }
    return null;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalList(htsjdk.samtools.util.IntervalList) UserException(org.broadinstitute.hellbender.exceptions.UserException) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 78 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SplitVcfs method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final VCFFileReader fileReader = new VCFFileReader(INPUT);
    final VCFHeader fileHeader = fileReader.getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
        final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build()) {
        snpWriter.writeHeader(fileHeader);
        indelWriter.writeHeader(fileHeader);
        int incorrectVariantCount = 0;
        final CloseableIterator<VariantContext> iterator = fileReader.iterator();
        while (iterator.hasNext()) {
            final VariantContext context = iterator.next();
            if (context.isIndel())
                indelWriter.add(context);
            else if (context.isSNP())
                snpWriter.add(context);
            else {
                if (STRICT)
                    throw new IllegalStateException("Found a record with type " + context.getType().name());
                else
                    incorrectVariantCount++;
            }
            progress.record(context.getContig(), context.getStart());
        }
        if (incorrectVariantCount > 0) {
            logger.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
        }
        CloserUtil.close(iterator);
        CloserUtil.close(fileReader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 79 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SparkUtils method convertHeaderlessHadoopBamShardToBam.

/**
     * Converts a headerless Hadoop bam shard (eg., a part0000, part0001, etc. file produced by
     * {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}) into a readable bam file
     * by adding a header and a BGZF terminator.
     *
     * This method is not intended for use with Hadoop bam shards that already have a header -- these shards are
     * already readable using samtools. Currently {@link ReadsSparkSink} saves the "shards" with a header for the
     * {@link ReadsWriteFormat#SHARDED} case, and without a header for the {@link ReadsWriteFormat#SINGLE} case.
     *
     * @param bamShard The headerless Hadoop bam shard to convert
     * @param header header for the BAM file to be created
     * @param destination path to which to write the new BAM file
     */
public static void convertHeaderlessHadoopBamShardToBam(final File bamShard, final SAMFileHeader header, final File destination) {
    try (FileOutputStream outStream = new FileOutputStream(destination)) {
        writeBAMHeaderToStream(header, outStream);
        FileUtils.copyFile(bamShard, outStream);
        outStream.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
    } catch (IOException e) {
        throw new UserException("Error writing to " + destination.getAbsolutePath(), e);
    }
}
Also used : RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 80 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SparkUtils method pathExists.

/**
     * Determine if the <code>targetPath</code> exists.
     * @param ctx JavaSparkContext
     * @param targetPath the <code>org.apache.hadoop.fs.Path</code> object to check
     * @return true if the targetPath exists, otherwise false
     */
public static boolean pathExists(final JavaSparkContext ctx, final Path targetPath) {
    Utils.nonNull(ctx);
    Utils.nonNull(targetPath);
    try {
        final FileSystem fs = targetPath.getFileSystem(ctx.hadoopConfiguration());
        return fs.exists(targetPath);
    } catch (IOException e) {
        throw new UserException("Error validating existence of path " + targetPath + ": " + e.getMessage());
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)100 File (java.io.File)30 IOException (java.io.IOException)30 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)11 SamReader (htsjdk.samtools.SamReader)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)10 SAMRecord (htsjdk.samtools.SAMRecord)9 IntervalList (htsjdk.samtools.util.IntervalList)9 List (java.util.List)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)8 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)8 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 LogManager (org.apache.logging.log4j.LogManager)8 Logger (org.apache.logging.log4j.Logger)8 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 MetricsFile (htsjdk.samtools.metrics.MetricsFile)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)5 FileNotFoundException (java.io.FileNotFoundException)5