use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class TitanFileConverter method convertHetPulldownToTitanHetFile.
/**
* Create a het pulldown file that is compatible with TITAN.
*
* @param hetPulldown Readable file from one of the het pulldown tools
* @param outputFile Not {@code null}
*/
public static void convertHetPulldownToTitanHetFile(final File hetPulldown, final File outputFile) {
IOUtils.canReadFile(hetPulldown);
try {
final AllelicCountCollection acc = new AllelicCountCollection(hetPulldown);
final TitanAllelicCountWriter titanAllelicCountWriter = new TitanAllelicCountWriter(outputFile);
titanAllelicCountWriter.writeAllRecords(acc.getCounts());
titanAllelicCountWriter.close();
} catch (final IOException ioe) {
throw new UserException.BadInput("Bad output file: " + outputFile);
} catch (final IllegalArgumentException iae) {
logger.warn("Cannot convert pulldown with basic verbosity (i.e., missing ref/alt nucleotide information) to TITAN-compatible file. An empty file will be output.");
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class PlotACNVResults method validateContigs.
private void validateContigs(final Map<String, Integer> contigLengthMap) {
final Set<String> contigNames = contigLengthMap.keySet();
//validate contig names and lengths in SNP counts file
final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(snpCountsContigNames)) {
logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in tangent file
final ReadCountCollection tangent;
try {
tangent = ReadCountCollectionUtils.parse(tangentFile);
} catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(tangentFile, e);
}
final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(tangentContigNames)) {
logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in segments file
final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(segmentsContigNames)) {
logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class AllelicPanelOfNormalsCreator method create.
/**
* Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
* sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
* @param siteFrequencyThreshold site-frequency threshold
* @return an {@link AllelicPanelOfNormals} containing sites
* above the site-frequency threshold
*/
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
logger.info("Creating allelic panel of normals...");
//used to filter on frequency
final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
//store only the total counts (smaller memory footprint)
final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
int pulldownFileCounter = 1;
final int totalNumberOfSamples = pulldownFiles.size();
for (final File pulldownFile : pulldownFiles) {
logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
for (final AllelicCount count : pulldownCounts.getCounts()) {
//update the sum of ref and alt counts at each site
final SimpleInterval site = count.getInterval();
final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
totalCountsMap.put(site, updatedCountAtSite);
//update the number of samples seen possessing each site
final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
if (numberOfSamplesAtSite == null) {
numberOfSamplesMap.put(site, new MutableInt(1));
} else {
numberOfSamplesAtSite.increment();
}
}
}
logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
//filter out sites that appear at a frequency strictly less than the provided threshold
final AllelicCountCollection totalCounts = new AllelicCountCollection();
numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
return new AllelicPanelOfNormals(totalCounts);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class AlleleFractionSegmenterUnitTest method generateCounts.
//visible for testing joint segmentation
protected static AllelicCountCollection generateCounts(final List<Double> minorAlleleFractionSequence, final List<SimpleInterval> positions, final RandomGenerator rng, final AlleleFractionGlobalParameters trueParams) {
//translate to ApacheCommons' parametrization of the gamma distribution
final GammaDistribution biasGenerator = getGammaDistribution(trueParams, rng);
final double outlierProbability = trueParams.getOutlierProbability();
final AllelicCountCollection counts = new AllelicCountCollection();
for (int n = 0; n < minorAlleleFractionSequence.size(); n++) {
counts.add(generateAllelicCount(minorAlleleFractionSequence.get(n), positions.get(n), rng, biasGenerator, outlierProbability));
}
return counts;
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.
the class AlleleFractionSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
final double trueMemoryLength = 1e5;
final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);
final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Aggregations