Search in sources :

Example 51 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class GenomeLocParser method parseGenomeLoc.

// --------------------------------------------------------------------------------------------------------------
//
// Parsing genome locs
//
// --------------------------------------------------------------------------------------------------------------
/**
     * parse a genome interval, from a location string
     *
     * Performs interval-style validation:
     *
     * contig is valid; start and stop less than the end; start <= stop, and start/stop are on the contig
     * @param str the string to parse
     *
     * @return a GenomeLoc representing the String
     *
     */
public GenomeLoc parseGenomeLoc(final String str) {
    try {
        if (isUnmappedGenomeLocString(str)) {
            return GenomeLoc.UNMAPPED;
        }
        final Locatable locatable = new SimpleInterval(str);
        final String contig = locatable.getContig();
        final int start = locatable.getStart();
        int stop = locatable.getEnd();
        if (!contigIsInDictionary(contig)) {
            throw new UserException.MalformedGenomeLoc("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
        }
        if (stop == Integer.MAX_VALUE) {
            // lookup the actual stop position!
            stop = getContigInfo(contig).getSequenceLength();
        }
        return createGenomeLoc(contig, getContigIndex(contig), start, stop, true);
    } catch (UserException.MalformedGenomeLoc e) {
        throw e;
    } catch (IllegalArgumentException | UserException e) {
        throw new UserException.MalformedGenomeLoc("Failed to parse Genome Location string: " + str + ": " + e.getMessage(), e);
    }
}
Also used : UserException(org.broadinstitute.hellbender.exceptions.UserException) Locatable(htsjdk.samtools.util.Locatable)

Example 52 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ReadsSparkSource method setHadoopBAMConfigurationProperties.

/**
     * Propagate any values that need to be passed to Hadoop-BAM through configuration properties:
     *
     *   - the validation stringency property is always set using the current value of the
     *     validationStringency field
     *   - if the input file is a CRAM file, the reference value will also be set, and must be a URI
     *     which includes a scheme. if no scheme is provided a "file://" scheme will be used. for
     *     non-CRAM input the reference may be null.
     *   - if the input file is not CRAM, the reference property is *unset* to prevent Hadoop-BAM
     *     from passing a stale value through to htsjdk when multiple read calls are made serially
     *     with different inputs but the same Spark context
     */
private void setHadoopBAMConfigurationProperties(final String inputName, final String referenceName) {
    // use the Hadoop configuration attached to the Spark context to maintain cumulative settings
    final Configuration conf = ctx.hadoopConfiguration();
    conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, validationStringency.name());
    if (!IOUtils.isCramFileName(inputName)) {
        // only set the reference for CRAM input
        conf.unset(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
    } else {
        if (null == referenceName) {
            throw new UserException.MissingReference("A reference is required for CRAM input");
        } else {
            if (ReferenceTwoBitSource.isTwoBit(referenceName)) {
                // htsjdk can't handle 2bit reference files
                throw new UserException("A 2bit file cannot be used as a CRAM file reference");
            } else {
                // Hadoop-BAM requires the reference to be a URI, including scheme
                final Path refPath = new Path(referenceName);
                if (!SparkUtils.pathExists(ctx, refPath)) {
                    throw new UserException.MissingReference("The specified fasta file (" + referenceName + ") does not exist.");
                }
                final String referenceURI = null == refPath.toUri().getScheme() ? "file://" + new File(referenceName).getAbsolutePath() : referenceName;
                conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, referenceURI);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 53 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class SplitReads method onTraversalStart.

@Override
public void onTraversalStart() {
    IOUtil.assertDirectoryIsWritable(OUTPUT_DIRECTORY);
    if (readArguments.getReadFiles().size() != 1) {
        throw new UserException("This tool only accepts a single SAM/BAM/CRAM as input");
    }
    if (SAMPLE) {
        splitters.add(new SampleNameSplitter());
    }
    if (READ_GROUP) {
        splitters.add(new ReadGroupIdSplitter());
    }
    if (LIBRARY_NAME) {
        splitters.add(new LibraryNameSplitter());
    }
    outs = createWriters(splitters);
}
Also used : LibraryNameSplitter(org.broadinstitute.hellbender.tools.readersplitters.LibraryNameSplitter) SampleNameSplitter(org.broadinstitute.hellbender.tools.readersplitters.SampleNameSplitter) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReadGroupIdSplitter(org.broadinstitute.hellbender.tools.readersplitters.ReadGroupIdSplitter)

Example 54 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class VariantRecalibrator method onTraversalSuccess.

//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
    // finish processing any queued variants
    consumeQueuedVariants();
    for (int i = 1; i <= max_attempts; i++) {
        try {
            dataManager.setData(reduceSum);
            // Each data point is now (x - mean) / standard deviation
            dataManager.normalizeData();
            // Generate the positive model using the training data and evaluate each variant
            final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
            final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
            engine.evaluateData(dataManager.getData(), goodModel, false);
            // Generate the negative model using the worst performing data and evaluate each variant contrastively
            final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
            final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
            // Don't need the aggregate data anymore so let's free up the memory
            dataManager.dropAggregateData();
            engine.evaluateData(dataManager.getData(), badModel, true);
            if (badModel.failedToConverge || goodModel.failedToConverge) {
                throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
            }
            if (outputModel) {
                GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
                try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
                    report.print(modelReportStream);
                } catch (FileNotFoundException e) {
                    throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
                }
            }
            engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
            // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
            final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
            final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
            final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
            tranchesStream.print(Tranche.tranchesString(tranches));
            logger.info("Writing out recalibration table...");
            dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
            if (RSCRIPT_FILE != null) {
                logger.info("Writing out visualization Rscript file...");
                createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
            }
            if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
                // Print out an info message to make it clear why the tranches plot is not generated
                logger.info("Tranches plot will not be generated since we are running in INDEL mode");
            } else {
                // Execute the RScript command to plot the table of truth values
                RScriptExecutor executor = new RScriptExecutor();
                executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
                executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
                // Print out the command line to make it clear to the user what is being executed and how one might modify it
                logger.info("Executing: " + executor.getApproximateCommandLine());
                executor.exec();
            }
            return true;
        } catch (Exception e) {
            // IllegalArgumentException.
            if (i == max_attempts) {
                throw e;
            } else {
                logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
            }
        }
    }
    return false;
}
Also used : GATKReport(org.broadinstitute.hellbender.utils.report.GATKReport) PrintStream(java.io.PrintStream) FileNotFoundException(java.io.FileNotFoundException) Resource(org.broadinstitute.hellbender.utils.io.Resource) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 55 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class CollectRrbsMetrics method doWork.

@Override
protected Object doWork() {
    if (!METRICS_FILE_PREFIX.endsWith(".")) {
        METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
    }
    final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
    final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
    final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
    assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
    final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final ProgressLogger progressLogger = new ProgressLogger(logger);
    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
    for (final SAMRecord samRecord : samReader) {
        progressLogger.record(samRecord);
        if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
            final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
            metricsCollector.acceptRecord(samRecord, referenceSequence);
        }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
    metricsCollector.addAllLevelsToFile(rrbsMetrics);
    // Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
    // we get it out split it apart to the two separate MetricsFiles and write them to file
    final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
    final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
    for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
        summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
        for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
            detailsFile.addMetric(detailMetric);
        }
    }
    summaryFile.write(SUMMARY_OUT);
    detailsFile.write(DETAILS_OUT);
    if (PRODUCE_PLOT) {
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
        executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
        executor.exec();
    }
    CloserUtil.close(samReader);
    return null;
}
Also used : Resource(org.broadinstitute.hellbender.utils.io.Resource) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)100 File (java.io.File)30 IOException (java.io.IOException)30 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)11 SamReader (htsjdk.samtools.SamReader)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)10 SAMRecord (htsjdk.samtools.SAMRecord)9 IntervalList (htsjdk.samtools.util.IntervalList)9 List (java.util.List)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)8 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)8 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 LogManager (org.apache.logging.log4j.LogManager)8 Logger (org.apache.logging.log4j.Logger)8 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 MetricsFile (htsjdk.samtools.metrics.MetricsFile)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)5 FileNotFoundException (java.io.FileNotFoundException)5