use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class GenomeLocParser method parseGenomeLoc.
// --------------------------------------------------------------------------------------------------------------
//
// Parsing genome locs
//
// --------------------------------------------------------------------------------------------------------------
/**
* parse a genome interval, from a location string
*
* Performs interval-style validation:
*
* contig is valid; start and stop less than the end; start <= stop, and start/stop are on the contig
* @param str the string to parse
*
* @return a GenomeLoc representing the String
*
*/
public GenomeLoc parseGenomeLoc(final String str) {
try {
if (isUnmappedGenomeLocString(str)) {
return GenomeLoc.UNMAPPED;
}
final Locatable locatable = new SimpleInterval(str);
final String contig = locatable.getContig();
final int start = locatable.getStart();
int stop = locatable.getEnd();
if (!contigIsInDictionary(contig)) {
throw new UserException.MalformedGenomeLoc("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
}
if (stop == Integer.MAX_VALUE) {
// lookup the actual stop position!
stop = getContigInfo(contig).getSequenceLength();
}
return createGenomeLoc(contig, getContigIndex(contig), start, stop, true);
} catch (UserException.MalformedGenomeLoc e) {
throw e;
} catch (IllegalArgumentException | UserException e) {
throw new UserException.MalformedGenomeLoc("Failed to parse Genome Location string: " + str + ": " + e.getMessage(), e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ReadsSparkSource method setHadoopBAMConfigurationProperties.
/**
* Propagate any values that need to be passed to Hadoop-BAM through configuration properties:
*
* - the validation stringency property is always set using the current value of the
* validationStringency field
* - if the input file is a CRAM file, the reference value will also be set, and must be a URI
* which includes a scheme. if no scheme is provided a "file://" scheme will be used. for
* non-CRAM input the reference may be null.
* - if the input file is not CRAM, the reference property is *unset* to prevent Hadoop-BAM
* from passing a stale value through to htsjdk when multiple read calls are made serially
* with different inputs but the same Spark context
*/
private void setHadoopBAMConfigurationProperties(final String inputName, final String referenceName) {
// use the Hadoop configuration attached to the Spark context to maintain cumulative settings
final Configuration conf = ctx.hadoopConfiguration();
conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, validationStringency.name());
if (!IOUtils.isCramFileName(inputName)) {
// only set the reference for CRAM input
conf.unset(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
} else {
if (null == referenceName) {
throw new UserException.MissingReference("A reference is required for CRAM input");
} else {
if (ReferenceTwoBitSource.isTwoBit(referenceName)) {
// htsjdk can't handle 2bit reference files
throw new UserException("A 2bit file cannot be used as a CRAM file reference");
} else {
// Hadoop-BAM requires the reference to be a URI, including scheme
final Path refPath = new Path(referenceName);
if (!SparkUtils.pathExists(ctx, refPath)) {
throw new UserException.MissingReference("The specified fasta file (" + referenceName + ") does not exist.");
}
final String referenceURI = null == refPath.toUri().getScheme() ? "file://" + new File(referenceName).getAbsolutePath() : referenceName;
conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, referenceURI);
}
}
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SplitReads method onTraversalStart.
@Override
public void onTraversalStart() {
IOUtil.assertDirectoryIsWritable(OUTPUT_DIRECTORY);
if (readArguments.getReadFiles().size() != 1) {
throw new UserException("This tool only accepts a single SAM/BAM/CRAM as input");
}
if (SAMPLE) {
splitters.add(new SampleNameSplitter());
}
if (READ_GROUP) {
splitters.add(new ReadGroupIdSplitter());
}
if (LIBRARY_NAME) {
splitters.add(new LibraryNameSplitter());
}
outs = createWriters(splitters);
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class VariantRecalibrator method onTraversalSuccess.
//---------------------------------------------------------------------------------------------------------------
//
// on traversal success
//
//---------------------------------------------------------------------------------------------------------------
@Override
public Object onTraversalSuccess() {
// finish processing any queued variants
consumeQueuedVariants();
for (int i = 1; i <= max_attempts; i++) {
try {
dataManager.setData(reduceSum);
// Each data point is now (x - mean) / standard deviation
dataManager.normalizeData();
// Generate the positive model using the training data and evaluate each variant
final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
final GaussianMixtureModel goodModel = engine.generateModel(positiveTrainingData, VRAC.MAX_GAUSSIANS);
engine.evaluateData(dataManager.getData(), goodModel, false);
// Generate the negative model using the worst performing data and evaluate each variant contrastively
final List<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants();
final GaussianMixtureModel badModel = engine.generateModel(negativeTrainingData, Math.min(VRAC.MAX_GAUSSIANS_FOR_NEGATIVE_MODEL, VRAC.MAX_GAUSSIANS));
// Don't need the aggregate data anymore so let's free up the memory
dataManager.dropAggregateData();
engine.evaluateData(dataManager.getData(), badModel, true);
if (badModel.failedToConverge || goodModel.failedToConverge) {
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider " + (badModel.failedToConverge ? "raising the number of variants used to train the negative model (via --minNumBadVariants 5000, for example)." : "lowering the maximum number of Gaussians allowed for use in the model (via --maxGaussians 4, for example)."));
}
if (outputModel) {
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
try (final PrintStream modelReportStream = new PrintStream(modelReport)) {
report.print(modelReportStream);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile("File: (" + modelReport + ")", e);
}
}
engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth(dataManager.getData(), Double.NEGATIVE_INFINITY);
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric(nCallsAtTruth);
final List<Tranche> tranches = TrancheManager.findTranches(dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE);
tranchesStream.print(Tranche.tranchesString(tranches));
logger.info("Writing out recalibration table...");
dataManager.writeOutRecalibrationTable(recalWriter, getBestAvailableSequenceDictionary());
if (RSCRIPT_FILE != null) {
logger.info("Writing out visualization Rscript file...");
createVisualizationScript(dataManager.getRandomDataForPlotting(1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData()), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
}
if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
// Print out an info message to make it clear why the tranches plot is not generated
logger.info("Tranches plot will not be generated since we are running in INDEL mode");
} else {
// Execute the RScript command to plot the table of truth values
RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(PLOT_TRANCHES_RSCRIPT, VariantRecalibrator.class));
executor.addArgs(new File(TRANCHES_FILE).getAbsoluteFile(), TARGET_TITV);
// Print out the command line to make it clear to the user what is being executed and how one might modify it
logger.info("Executing: " + executor.getApproximateCommandLine());
executor.exec();
}
return true;
} catch (Exception e) {
// IllegalArgumentException.
if (i == max_attempts) {
throw e;
} else {
logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
}
}
}
return false;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class CollectRrbsMetrics method doWork.
@Override
protected Object doWork() {
if (!METRICS_FILE_PREFIX.endsWith(".")) {
METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
}
final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
}
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final ProgressLogger progressLogger = new ProgressLogger(logger);
final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
for (final SAMRecord samRecord : samReader) {
progressLogger.record(samRecord);
if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
metricsCollector.acceptRecord(samRecord, referenceSequence);
}
}
metricsCollector.finish();
final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
metricsCollector.addAllLevelsToFile(rrbsMetrics);
// Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
// we get it out split it apart to the two separate MetricsFiles and write them to file
final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
detailsFile.addMetric(detailMetric);
}
}
summaryFile.write(SUMMARY_OUT);
detailsFile.write(DETAILS_OUT);
if (PRODUCE_PLOT) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
executor.exec();
}
CloserUtil.close(samReader);
return null;
}
Aggregations