use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class GatherVcfs method doWork.
@Override
protected Object doWork() {
log.info("Checking inputs.");
final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList());
if (!ignoreSafetyChecks) {
for (final Path f : inputPaths) {
IOUtil.assertFileIsReadable(f);
}
}
IOUtil.assertFileIsWritable(output);
final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("In order to index the resulting VCF input VCFs must contain ##contig lines.");
}
if (!ignoreSafetyChecks) {
log.info("Checking file headers and first records to ensure compatibility.");
assertSameSamplesAndValidOrdering(inputPaths);
}
if (!useConventionalGather && areAllBlockCompressed(inputPaths) && areAllBlockCompressed(CollectionUtil.makeList(output.toPath()))) {
final List<File> inputFiles = inputs.stream().map(File::new).collect(Collectors.toList());
log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files.");
if (CREATE_INDEX)
log.warn("Index creation not currently supported when gathering block compressed VCFs.");
gatherWithBlockCopying(inputFiles, output);
} else {
log.info("Gathering by conventional means.");
gatherConventionally(sequenceDictionary, CREATE_INDEX, inputPaths, output, cloudPrefetchBuffer);
}
return null;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class PairedVariantSubContextIterator method doWork.
// TODO: add optimization if the samples are in the same file
// TODO: add option for auto-detect pairs based on same sample name
// TODO: allow multiple sample-pairs in one pass
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(TRUTH_VCF);
IOUtil.assertFileIsReadable(CALL_VCF);
final File summaryMetricsFile = new File(OUTPUT + SUMMARY_METRICS_FILE_EXTENSION);
final File detailedMetricsFile = new File(OUTPUT + DETAILED_METRICS_FILE_EXTENSION);
final File contingencyMetricsFile = new File(OUTPUT + CONTINGENCY_METRICS_FILE_EXTENSION);
IOUtil.assertFileIsWritable(summaryMetricsFile);
IOUtil.assertFileIsWritable(detailedMetricsFile);
IOUtil.assertFileIsWritable(contingencyMetricsFile);
final boolean usingIntervals = this.INTERVALS != null && !this.INTERVALS.isEmpty();
IntervalList intervals = null;
SAMSequenceDictionary intervalsSamSequenceDictionary = null;
if (usingIntervals) {
logger.info("Loading up region lists.");
long genomeBaseCount = 0;
for (final File f : INTERVALS) {
IOUtil.assertFileIsReadable(f);
final IntervalList tmpIntervalList = IntervalList.fromFile(f);
if (genomeBaseCount == 0) {
// Don't count the reference length more than once.
intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary();
genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength();
}
if (intervals == null)
intervals = tmpIntervalList;
else if (INTERSECT_INTERVALS)
intervals = IntervalList.intersection(intervals, tmpIntervalList);
else
intervals = IntervalList.union(intervals, tmpIntervalList);
}
if (intervals != null) {
intervals = intervals.uniqued();
}
logger.info("Finished loading up region lists.");
}
final VCFFileReader truthReader = new VCFFileReader(TRUTH_VCF, USE_VCF_INDEX);
final VCFFileReader callReader = new VCFFileReader(CALL_VCF, USE_VCF_INDEX);
// Check that the samples actually exist in the files!
if (!truthReader.getFileHeader().getGenotypeSamples().contains(TRUTH_SAMPLE)) {
throw new UserException("File " + TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + TRUTH_SAMPLE);
}
if (!callReader.getFileHeader().getGenotypeSamples().contains(CALL_SAMPLE)) {
throw new UserException("File " + CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + CALL_SAMPLE);
}
// Verify that both VCFs have the same Sequence Dictionary
SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());
if (usingIntervals) {
// If using intervals, verify that the sequence dictionaries agree with those of the VCFs
SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
}
// Build the pair of iterators over the regions of interest
final Iterator<VariantContext> truthIterator, callIterator;
if (usingIntervals) {
truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
} else {
truthIterator = truthReader.iterator();
callIterator = callReader.iterator();
}
// Now do the iteration and count things up
final PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator(truthIterator, TRUTH_SAMPLE, callIterator, CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
snpCounter = new GenotypeConcordanceCounts();
indelCounter = new GenotypeConcordanceCounts();
// A map to keep track of the count of Truth/Call States which we could not successfully classify
final Map<String, Integer> unClassifiedStatesMap = new HashMap<>();
logger.info("Starting iteration over variants.");
while (pairedIterator.hasNext()) {
final VcTuple tuple = pairedIterator.next();
final VariantContext.Type truthVariantContextType = tuple.truthVariantContext != null ? tuple.truthVariantContext.getType() : NO_VARIATION;
final VariantContext.Type callVariantContextType = tuple.callVariantContext != null ? tuple.callVariantContext.getType() : NO_VARIATION;
// A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
// Unclassified include MIXED/MNP/Symbolic...
boolean stateClassified = false;
final TruthAndCallStates truthAndCallStates = determineState(tuple.truthVariantContext, TRUTH_SAMPLE, tuple.callVariantContext, CALL_SAMPLE, MIN_GQ, MIN_DP);
if (truthVariantContextType == SNP) {
if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
// Note. If truth is SNP and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
snpCounter.increment(truthAndCallStates);
stateClassified = true;
}
} else if (truthVariantContextType == INDEL) {
// Note. If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
} else if (truthVariantContextType == MIXED) {
// Note. If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
if (callVariantContextType == SNP) {
snpCounter.increment(truthAndCallStates);
stateClassified = true;
} else // Note. If truth is MIXED and call is INDEL, the event will be logged in the snpCounter, with column = MIXED
if (callVariantContextType == INDEL) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
} else if (truthVariantContextType == NO_VARIATION) {
if (callVariantContextType == SNP) {
snpCounter.increment(truthAndCallStates);
stateClassified = true;
} else if (callVariantContextType == INDEL) {
indelCounter.increment(truthAndCallStates);
stateClassified = true;
}
}
if (!stateClassified) {
final String condition = truthVariantContextType + " " + callVariantContextType;
Integer count = unClassifiedStatesMap.get(condition);
if (count == null)
count = 0;
unClassifiedStatesMap.put(condition, ++count);
}
final VariantContext variantContextForLogging = tuple.truthVariantContext != null ? tuple.truthVariantContext : tuple.callVariantContext;
progress.record(variantContextForLogging.getContig(), variantContextForLogging.getStart());
}
// Calculate and store the summary-level metrics
final MetricsFile<GenotypeConcordanceSummaryMetrics, ?> genotypeConcordanceSummaryMetricsFile = getMetricsFile();
GenotypeConcordanceSummaryMetrics summaryMetrics = new GenotypeConcordanceSummaryMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
summaryMetrics = new GenotypeConcordanceSummaryMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceSummaryMetricsFile.addMetric(summaryMetrics);
genotypeConcordanceSummaryMetricsFile.write(summaryMetricsFile);
// Calculate and store the detailed metrics for both SNP and indels
final MetricsFile<GenotypeConcordanceDetailMetrics, ?> genotypeConcordanceDetailMetrics = getMetricsFile();
outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceDetailMetrics.write(detailedMetricsFile);
// Calculate and score the contingency metrics
final MetricsFile<GenotypeConcordanceContingencyMetrics, ?> genotypeConcordanceContingencyMetricsFile = getMetricsFile();
GenotypeConcordanceContingencyMetrics contingencyMetrics = new GenotypeConcordanceContingencyMetrics(SNP, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
contingencyMetrics = new GenotypeConcordanceContingencyMetrics(INDEL, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
genotypeConcordanceContingencyMetricsFile.addMetric(contingencyMetrics);
genotypeConcordanceContingencyMetricsFile.write(contingencyMetricsFile);
for (final String condition : unClassifiedStatesMap.keySet()) {
logger.info("Uncovered truth/call Variant Context Type Counts: " + condition + " " + unClassifiedStatesMap.get(condition));
}
return null;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SplitVcfs method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final VCFFileReader fileReader = new VCFFileReader(INPUT);
final VCFHeader fileHeader = fileReader.getFileHeader();
final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX)
builder.setOption(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build()) {
snpWriter.writeHeader(fileHeader);
indelWriter.writeHeader(fileHeader);
int incorrectVariantCount = 0;
final CloseableIterator<VariantContext> iterator = fileReader.iterator();
while (iterator.hasNext()) {
final VariantContext context = iterator.next();
if (context.isIndel())
indelWriter.add(context);
else if (context.isSNP())
snpWriter.add(context);
else {
if (STRICT)
throw new IllegalStateException("Found a record with type " + context.getType().name());
else
incorrectVariantCount++;
}
progress.record(context.getContig(), context.getStart());
}
if (incorrectVariantCount > 0) {
logger.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
}
CloserUtil.close(iterator);
CloserUtil.close(fileReader);
}
return null;
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SparkUtils method convertHeaderlessHadoopBamShardToBam.
/**
* Converts a headerless Hadoop bam shard (eg., a part0000, part0001, etc. file produced by
* {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}) into a readable bam file
* by adding a header and a BGZF terminator.
*
* This method is not intended for use with Hadoop bam shards that already have a header -- these shards are
* already readable using samtools. Currently {@link ReadsSparkSink} saves the "shards" with a header for the
* {@link ReadsWriteFormat#SHARDED} case, and without a header for the {@link ReadsWriteFormat#SINGLE} case.
*
* @param bamShard The headerless Hadoop bam shard to convert
* @param header header for the BAM file to be created
* @param destination path to which to write the new BAM file
*/
public static void convertHeaderlessHadoopBamShardToBam(final File bamShard, final SAMFileHeader header, final File destination) {
try (FileOutputStream outStream = new FileOutputStream(destination)) {
writeBAMHeaderToStream(header, outStream);
FileUtils.copyFile(bamShard, outStream);
outStream.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
} catch (IOException e) {
throw new UserException("Error writing to " + destination.getAbsolutePath(), e);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class SparkUtils method pathExists.
/**
* Determine if the <code>targetPath</code> exists.
* @param ctx JavaSparkContext
* @param targetPath the <code>org.apache.hadoop.fs.Path</code> object to check
* @return true if the targetPath exists, otherwise false
*/
public static boolean pathExists(final JavaSparkContext ctx, final Path targetPath) {
Utils.nonNull(ctx);
Utils.nonNull(targetPath);
try {
final FileSystem fs = targetPath.getFileSystem(ctx.hadoopConfiguration());
return fs.exists(targetPath);
} catch (IOException e) {
throw new UserException("Error validating existence of path " + targetPath + ": " + e.getMessage());
}
}
Aggregations