Search in sources :

Example 11 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.

the class TitanFileConverter method convertHetPulldownToTitanHetFile.

/**
     * Create a het pulldown file that is compatible with TITAN.
     *
     * @param hetPulldown Readable file from one of the het pulldown tools
     * @param outputFile Not {@code null}
     */
public static void convertHetPulldownToTitanHetFile(final File hetPulldown, final File outputFile) {
    IOUtils.canReadFile(hetPulldown);
    try {
        final AllelicCountCollection acc = new AllelicCountCollection(hetPulldown);
        final TitanAllelicCountWriter titanAllelicCountWriter = new TitanAllelicCountWriter(outputFile);
        titanAllelicCountWriter.writeAllRecords(acc.getCounts());
        titanAllelicCountWriter.close();
    } catch (final IOException ioe) {
        throw new UserException.BadInput("Bad output file: " + outputFile);
    } catch (final IllegalArgumentException iae) {
        logger.warn("Cannot convert pulldown with basic verbosity (i.e., missing ref/alt nucleotide information) to TITAN-compatible file.  An empty file will be output.");
    }
}
Also used : AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 12 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.

the class PlotACNVResults method validateContigs.

private void validateContigs(final Map<String, Integer> contigLengthMap) {
    final Set<String> contigNames = contigLengthMap.keySet();
    //validate contig names and lengths in SNP counts file
    final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
    final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(snpCountsContigNames)) {
        logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
    snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
    //validate contig names and lengths in tangent file
    final ReadCountCollection tangent;
    try {
        tangent = ReadCountCollectionUtils.parse(tangentFile);
    } catch (final IOException e) {
        throw new UserException.CouldNotReadInputFile(tangentFile, e);
    }
    final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(tangentContigNames)) {
        logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
    tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
    //validate contig names and lengths in segments file
    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
    final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(segmentsContigNames)) {
        logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
    segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 13 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.

the class AllelicPanelOfNormalsCreator method create.

/**
     * Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
     * sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
     * @param siteFrequencyThreshold    site-frequency threshold
     * @return                          an {@link AllelicPanelOfNormals} containing sites
     *                                  above the site-frequency threshold
     */
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
    logger.info("Creating allelic panel of normals...");
    //used to filter on frequency
    final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
    //store only the total counts (smaller memory footprint)
    final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
    int pulldownFileCounter = 1;
    final int totalNumberOfSamples = pulldownFiles.size();
    for (final File pulldownFile : pulldownFiles) {
        logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
        final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
        for (final AllelicCount count : pulldownCounts.getCounts()) {
            //update the sum of ref and alt counts at each site
            final SimpleInterval site = count.getInterval();
            final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
            final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
            totalCountsMap.put(site, updatedCountAtSite);
            //update the number of samples seen possessing each site
            final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
            if (numberOfSamplesAtSite == null) {
                numberOfSamplesMap.put(site, new MutableInt(1));
            } else {
                numberOfSamplesAtSite.increment();
            }
        }
    }
    logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
    //filter out sites that appear at a frequency strictly less than the provided threshold
    final AllelicCountCollection totalCounts = new AllelicCountCollection();
    numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
    logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
    return new AllelicPanelOfNormals(totalCounts);
}
Also used : MutableInt(org.apache.commons.lang3.mutable.MutableInt) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) HashMap(java.util.HashMap) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) ArrayList(java.util.ArrayList) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Map(java.util.Map) Utils(org.broadinstitute.hellbender.utils.Utils) LogManager(org.apache.logging.log4j.LogManager) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) HashMap(java.util.HashMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 14 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.

the class AlleleFractionSegmenterUnitTest method generateCounts.

//visible for testing joint segmentation
protected static AllelicCountCollection generateCounts(final List<Double> minorAlleleFractionSequence, final List<SimpleInterval> positions, final RandomGenerator rng, final AlleleFractionGlobalParameters trueParams) {
    //translate to ApacheCommons' parametrization of the gamma distribution
    final GammaDistribution biasGenerator = getGammaDistribution(trueParams, rng);
    final double outlierProbability = trueParams.getOutlierProbability();
    final AllelicCountCollection counts = new AllelicCountCollection();
    for (int n = 0; n < minorAlleleFractionSequence.size(); n++) {
        counts.add(generateAllelicCount(minorAlleleFractionSequence.get(n), positions.get(n), rng, biasGenerator, outlierProbability));
    }
    return counts;
}
Also used : AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) GammaDistribution(org.apache.commons.math3.distribution.GammaDistribution)

Example 15 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk by broadinstitute.

the class AlleleFractionSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
    final double trueMemoryLength = 1e5;
    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);
    final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Random(java.util.Random) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Aggregations

AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)30 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)16 AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)14 Test (org.testng.annotations.Test)14 File (java.io.File)8 Collectors (java.util.stream.Collectors)8 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)8 IOException (java.io.IOException)6 List (java.util.List)6 GammaDistribution (org.apache.commons.math3.distribution.GammaDistribution)6 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)6 UserException (org.broadinstitute.hellbender.exceptions.UserException)6 org.broadinstitute.hellbender.tools.exome (org.broadinstitute.hellbender.tools.exome)6 ModeledSegment (org.broadinstitute.hellbender.tools.exome.ModeledSegment)6 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)6 AlleleFractionGlobalParameters (org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters)5 java.util (java.util)4 Arrays (java.util.Arrays)4 Random (java.util.Random)4 IntStream (java.util.stream.IntStream)4