use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.
the class Concordance method onTraversalSuccess.
@Override
public Object onTraversalSuccess() {
try (ConcordanceSummaryRecord.Writer concordanceSummaryWriter = ConcordanceSummaryRecord.getWriter(summary)) {
concordanceSummaryWriter.writeRecord(new ConcordanceSummaryRecord(VariantContext.Type.SNP, snpCounts.get(ConcordanceState.TRUE_POSITIVE).longValue(), snpCounts.get(ConcordanceState.FALSE_POSITIVE).longValue(), snpCounts.get(ConcordanceState.FALSE_NEGATIVE).longValue() + snpCounts.get(ConcordanceState.FILTERED_FALSE_NEGATIVE).longValue()));
concordanceSummaryWriter.writeRecord(new ConcordanceSummaryRecord(VariantContext.Type.INDEL, indelCounts.get(ConcordanceState.TRUE_POSITIVE).longValue(), indelCounts.get(ConcordanceState.FALSE_POSITIVE).longValue(), indelCounts.get(ConcordanceState.FALSE_NEGATIVE).longValue() + indelCounts.get(ConcordanceState.FILTERED_FALSE_NEGATIVE).longValue()));
} catch (IOException e) {
throw new UserException("Encountered an IO exception writing the concordance summary table", e);
}
if (truePositivesAndFalsePositivesVcfWriter != null) {
truePositivesAndFalsePositivesVcfWriter.close();
}
if (truePositivesAndFalseNegativesVcfWriter != null) {
truePositivesAndFalseNegativesVcfWriter.close();
}
if (filteredTrueNegativesAndFalseNegativesVcfWriter != null) {
filteredTrueNegativesAndFalseNegativesVcfWriter.close();
}
return "SUCCESS";
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.
the class CountFalsePositives method onTraversalSuccess.
@Override
public Object onTraversalSuccess() {
final List<SimpleInterval> intervals = intervalArgumentCollection.getIntervals(getReferenceDictionary());
final long targetTerritory = intervals.stream().mapToInt(i -> i.size()).sum();
try (FalsePositiveTableWriter writer = new FalsePositiveTableWriter(outputFile)) {
FalsePositiveRecord falsePositiveRecord = new FalsePositiveRecord(id, snpFalsePositiveCount, indelFalsePositiveCount, targetTerritory);
writer.writeRecord(falsePositiveRecord);
} catch (IOException e) {
throw new UserException(String.format("Encountered an IO exception while opening %s", outputFile));
}
return "SUCCESS";
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class BayesianHetPulldownCalculator method getHetPulldown.
/**
* For a given normal or tumor BAM file, walks through the list of common SNPs,
* {@link BayesianHetPulldownCalculator#snpIntervals}), detects heterozygous sites, and returns
* a {@link Pulldown} containing detailed information on the called heterozygous SNP sites.
*
* The {@code hetCallingStrigency} parameters sets the threshold posterior for calling a Het SNP site:
*
* hetPosteriorThreshold = 1 - 10^{-hetCallingStringency}
* hetThresholdLogOdds = log(hetPosteriorThreshold/(1-hetPosteriorThreshold))
* = log(10^{hetCallingStringency} - 1)
*
* (see CNV-methods.pdf for details)
*
* @param bamFile sorted BAM file for sample
* @param hetCallingStringency strigency for calling a Het site
* @return Pulldown of heterozygous SNP sites in 1-based format
*/
public Pulldown getHetPulldown(final File bamFile, final double hetCallingStringency) {
/* log odds from stringency */
final double hetThresholdLogOdds = FastMath.log(FastMath.pow(10, hetCallingStringency) - 1);
try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(bamFile);
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile)) {
if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
}
final Pulldown hetPulldown = new Pulldown(bamReader.getFileHeader());
final SamLocusIterator locusIterator = getSamLocusIteratorWithDefaultFilters(bamReader);
final int totalNumberOfSNPs = snpIntervals.size();
logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
int locusCount = 0;
for (final SamLocusIterator.LocusInfo locus : locusIterator) {
if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
logger.info("Examined " + locusCount + " covered sites.");
}
locusCount++;
final int totalReadCount = locus.getRecordAndOffsets().size();
if (totalReadCount <= readDepthThreshold) {
continue;
}
final Nucleotide refBase = Nucleotide.valueOf(refWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
if (!isProperBase(refBase)) {
logger.warn(String.format("The reference position at %d has an unknown base call (value: %s). Even though" + " this position is indicated to be a possible heterozygous SNP in the provided SNP interval list," + " no inference can be made. Continuing ...", locus.getPosition(), refBase.toString()));
continue;
}
final Map<Nucleotide, List<BaseQuality>> baseQualities = getPileupBaseQualities(locus);
final Nucleotide altBase = inferAltFromPileup(baseQualities, refBase);
/* calculate Het log odds */
final double hetLogLikelihood = getHetLogLikelihood(baseQualities, refBase, altBase);
final double homLogLikelihood = getHomLogLikelihood(baseQualities, refBase, altBase, DEFAULT_PRIOR_REF_HOM);
final double hetLogOdds = (hetLogLikelihood + FastMath.log(DEFAULT_PRIOR_HET)) - (homLogLikelihood + FastMath.log(1 - DEFAULT_PRIOR_HET));
if (hetLogOdds > hetThresholdLogOdds) {
hetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), baseQualities.get(refBase).size(), baseQualities.get(altBase).size(), refBase, altBase, totalReadCount, hetLogOdds));
}
}
logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
return hetPulldown;
} catch (final IOException | SAMFormatException e) {
throw new UserException(e.getMessage());
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class BayesianHetPulldownCalculator method getTumorHetPulldownFromNormalPulldown.
/**
* Calculates the Het pulldown from a tumor file, given the tumor BAM file and the pulldown from a matched
* normal BAM file.
*
* Note: this method does not perform any statistical inference. The Het SNP sites are directly carried over
* from the matched normal pulldown. Here, we only collect statistics (ref count, alt count, read depth) and
* save to a pulldown. The verbosity level of the pulldown is INTERMEDIATE (see {@link AllelicCountTableColumn}).
*
* @param tumorBamFile the tumor BAM file
* @param normalHetPulldown the matched normal Het pulldown
* @return tumor Het pulldown
*/
public Pulldown getTumorHetPulldownFromNormalPulldown(final File tumorBamFile, final Pulldown normalHetPulldown) {
try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(tumorBamFile)) {
if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException.BadInput("BAM file " + tumorBamFile.toString() + " must be coordinate sorted.");
}
final Pulldown tumorHetPulldown = new Pulldown(bamReader.getFileHeader());
final SamLocusIterator locusIterator = getSamLocusIteratorWithDefaultFilters(bamReader);
/* get a map of SimpleIntervals in the pulldown to their index */
final Map<SimpleInterval, Integer> normalPulldownIndexMap = normalHetPulldown.getSimpleIntervalToIndexMap();
final int totalNumberOfSNPs = snpIntervals.size();
logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
int locusCount = 0;
for (final SamLocusIterator.LocusInfo locus : locusIterator) {
if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
logger.info("Examined " + locusCount + " covered sites.");
}
locusCount++;
final int totalReadCount = locus.getRecordAndOffsets().size();
if (totalReadCount <= readDepthThreshold) {
continue;
}
/* find the AllelicCount from the normal pulldown */
int indexInNormalPulldown;
try {
indexInNormalPulldown = normalPulldownIndexMap.get(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()));
} catch (NullPointerException e) {
throw new GATKException.ShouldNeverReachHereException("Can not find the required AllelicCount " + "object in the normal pulldown. Stopping.");
}
/* just count the alt and ref nucleotide and add to the tumor pulldown */
final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
tumorHetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), (int) baseCounts.get(normalHetPulldown.getCounts().get(indexInNormalPulldown).getRefNucleotide()), (int) baseCounts.get(normalHetPulldown.getCounts().get(indexInNormalPulldown).getAltNucleotide()), normalHetPulldown.getCounts().get(indexInNormalPulldown).getRefNucleotide(), normalHetPulldown.getCounts().get(indexInNormalPulldown).getAltNucleotide(), totalReadCount));
}
logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
return tumorHetPulldown;
} catch (final IOException | SAMFormatException e) {
throw new UserException(e.getMessage());
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class HetPulldownCalculator method getHetPulldown.
/**
* For a normal or tumor sample, returns a data structure giving (intervals, reference counts, alternate counts),
* where intervals give positions of likely heterozygous SNP sites.
*
* <p>
* For a normal sample:
* <ul>
* The IntervalList snpIntervals gives common SNP sites in 1-based format.
* </ul>
* <ul>
* The p-value threshold must be specified for a two-sided binomial test,
* which is used to determine SNP sites from snpIntervals that are
* compatible with a heterozygous SNP, given the sample. Only these sites are output.
* </ul>
* </p>
* <p>
* For a tumor sample:
* <ul>
* The IntervalList snpIntervals gives heterozygous SNP sites likely to be present in the normal sample.
* This should be from {@link HetPulldownCalculator#getNormal} in 1-based format.
* Only these sites are output.
* </ul>
* </p>
* @param bamFile sorted BAM file for sample
* @param snpIntervals IntervalList of SNP sites
* @param sampleType flag indicating type of sample (SampleType.NORMAL or SampleType.TUMOR)
* (determines whether to perform binomial test)
* @param pvalThreshold p-value threshold for two-sided binomial test, used for normal sample
* @param minimumRawReads minimum number of total reads that must be present at a het site
* @return Pulldown of heterozygous SNP sites in 1-based format
*/
private Pulldown getHetPulldown(final File bamFile, final IntervalList snpIntervals, final SampleType sampleType, final double pvalThreshold, final int minimumRawReads) {
try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(bamFile);
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile)) {
if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
}
final Pulldown hetPulldown = new Pulldown(bamReader.getFileHeader());
final int totalNumberOfSNPs = snpIntervals.size();
final SamLocusIterator locusIterator = new SamLocusIterator(bamReader, snpIntervals, totalNumberOfSNPs < MAX_INTERVALS_FOR_INDEX);
//set read and locus filters [note: read counts match IGV, but off by a few from pysam.mpileup]
final List<SamRecordFilter> samFilters = Arrays.asList(new NotPrimaryAlignmentFilter(), new DuplicateReadFilter());
locusIterator.setSamFilters(samFilters);
locusIterator.setEmitUncoveredLoci(false);
locusIterator.setIncludeNonPfReads(false);
locusIterator.setMappingQualityScoreCutoff(minMappingQuality);
locusIterator.setQualityScoreCutoff(minBaseQuality);
logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
int locusCount = 0;
for (final SamLocusIterator.LocusInfo locus : locusIterator) {
if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
logger.info("Examined " + locusCount + " covered sites.");
}
locusCount++;
//include N, etc. reads here
final int totalReadCount = locus.getRecordAndOffsets().size();
if (totalReadCount < minimumRawReads) {
continue;
}
final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
//only include total ACGT counts in binomial test (exclude N, etc.)
final int totalBaseCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).sum();
if (sampleType == SampleType.NORMAL && !isPileupHetCompatible(baseCounts, totalBaseCount, pvalThreshold)) {
continue;
}
final Nucleotide refBase = Nucleotide.valueOf(refWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
final int refReadCount = (int) baseCounts.get(refBase);
final int altReadCount = totalBaseCount - refReadCount;
hetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), refReadCount, altReadCount));
}
logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
return hetPulldown;
} catch (final IOException | SAMFormatException e) {
throw new UserException(e.getMessage());
}
}
Aggregations