Search in sources :

Example 6 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.

the class TitanFileConverter method convertHetPulldownToTitanHetFile.

/**
     * Create a het pulldown file that is compatible with TITAN.
     *
     * @param hetPulldown Readable file from one of the het pulldown tools
     * @param outputFile Not {@code null}
     */
public static void convertHetPulldownToTitanHetFile(final File hetPulldown, final File outputFile) {
    IOUtils.canReadFile(hetPulldown);
    try {
        final AllelicCountCollection acc = new AllelicCountCollection(hetPulldown);
        final TitanAllelicCountWriter titanAllelicCountWriter = new TitanAllelicCountWriter(outputFile);
        titanAllelicCountWriter.writeAllRecords(acc.getCounts());
        titanAllelicCountWriter.close();
    } catch (final IOException ioe) {
        throw new UserException.BadInput("Bad output file: " + outputFile);
    } catch (final IllegalArgumentException iae) {
        logger.warn("Cannot convert pulldown with basic verbosity (i.e., missing ref/alt nucleotide information) to TITAN-compatible file.  An empty file will be output.");
    }
}
Also used : AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 7 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.

the class PlotACNVResults method validateContigs.

private void validateContigs(final Map<String, Integer> contigLengthMap) {
    final Set<String> contigNames = contigLengthMap.keySet();
    //validate contig names and lengths in SNP counts file
    final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
    final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(snpCountsContigNames)) {
        logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
    snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
    //validate contig names and lengths in tangent file
    final ReadCountCollection tangent;
    try {
        tangent = ReadCountCollectionUtils.parse(tangentFile);
    } catch (final IOException e) {
        throw new UserException.CouldNotReadInputFile(tangentFile, e);
    }
    final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(tangentContigNames)) {
        logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
    tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
    //validate contig names and lengths in segments file
    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
    final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
    if (!contigNames.containsAll(segmentsContigNames)) {
        logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
    }
    final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
    segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) Resource(org.broadinstitute.hellbender.utils.io.Resource) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceUtils(org.broadinstitute.hellbender.utils.reference.ReferenceUtils) Utils(org.broadinstitute.hellbender.utils.Utils) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 8 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.

the class PerformAlleleFractionSegmentation method doWork.

@Override
public Object doWork() {
    final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment)

Example 9 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.

the class AlleleFractionModellerUnitTest method dataBiasCorrection.

@DataProvider(name = "biasCorrection")
public Object[][] dataBiasCorrection() {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final AllelicCountCollection sampleNormal = new AllelicCountCollection(SAMPLE_NORMAL_FILE);
    final AllelicCountCollection sampleWithBadSNPs = new AllelicCountCollection(SAMPLE_WITH_BAD_SNPS_FILE);
    final AllelicCountCollection sampleWithEvent = new AllelicCountCollection(SAMPLE_WITH_EVENT_FILE);
    final AllelicPanelOfNormals allelicPoNNormal = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
    final AllelicPanelOfNormals allelicPoNWithBadSNPs = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_WITH_BAD_SNPS_COUNTS_FILE));
    final double minorFractionExpectedInMiddleSegmentNormal = 0.5;
    final double minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN = 0.4;
    final double minorFractionExpectedInMiddleSegmentWithEvent = 0.33;
    return new Object[][] { { sampleNormal, allelicPoNNormal, minorFractionExpectedInMiddleSegmentNormal }, { sampleWithBadSNPs, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN }, { sampleWithEvent, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithEvent }, { sampleWithBadSNPs, allelicPoNWithBadSNPs, minorFractionExpectedInMiddleSegmentNormal } };
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) DataProvider(org.testng.annotations.DataProvider)

Example 10 with AllelicCountCollection

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.

the class SNPSegmenterUnitTest method testAllelicFractionBasedSegmentation.

/**
     * Tests that segments are correctly determined using allelic counts from SNP sites.
     * Segment-mean and target-number columns from expected segment file are not checked.
     */
@Test
public void testAllelicFractionBasedSegmentation() {
    final String sampleName = "test";
    final File snpFile = new File(TEST_SUB_DIR, "snps-simplified-for-allelic-fraction-segmentation.tsv");
    final List<AllelicCount> snpCounts = new AllelicCountCollection(snpFile).getCounts();
    final TargetCollection<AllelicCount> snps = new HashedListTargetCollection<>(snpCounts);
    final File resultFile = createTempFile("snp-segmenter-test-result", ".seg");
    SNPSegmenter.writeSegmentFile(snps, sampleName, resultFile);
    final File expectedFile = new File(TEST_SUB_DIR, "snp-segmenter-test-expected.seg");
    Assert.assertTrue(resultFile.exists(), "SNPSegmenterTest output was not written to temp file: " + resultFile);
    final List<SimpleInterval> result = SegmentUtils.readIntervalsFromSegmentFile(resultFile);
    final List<SimpleInterval> expected = SegmentUtils.readIntervalsFromSegmentFile(expectedFile);
    Assert.assertEquals(result, expected);
}
Also used : AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)30 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)16 AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)14 Test (org.testng.annotations.Test)14 File (java.io.File)8 Collectors (java.util.stream.Collectors)8 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)8 IOException (java.io.IOException)6 List (java.util.List)6 GammaDistribution (org.apache.commons.math3.distribution.GammaDistribution)6 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)6 UserException (org.broadinstitute.hellbender.exceptions.UserException)6 org.broadinstitute.hellbender.tools.exome (org.broadinstitute.hellbender.tools.exome)6 ModeledSegment (org.broadinstitute.hellbender.tools.exome.ModeledSegment)6 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)6 AlleleFractionGlobalParameters (org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters)5 java.util (java.util)4 Arrays (java.util.Arrays)4 Random (java.util.Random)4 IntStream (java.util.stream.IntStream)4