Search in sources :

Example 41 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsBigBins.

@Test
public void testSparkGenomeReadCountsBigBins() throws IOException {
    final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
    final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "16000" };
    runCommandLine(arguments);
    Assert.assertTrue(outputFile.exists());
    Assert.assertTrue(outputFile.length() > 0);
    final ReadCountCollection coverage = ReadCountCollectionUtils.parse(outputFile);
    final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
    Assert.assertTrue(targetsFile.exists());
    Assert.assertTrue(targetsFile.length() > 0);
    final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
    Assert.assertEquals(targets.size(), 4);
    Assert.assertEquals(targets.get(1).getEnd(), 16000);
    Assert.assertEquals(targets.get(2).getName(), "target_3_1_16000");
    Assert.assertEquals(coverage.targets().size(), targets.size());
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 42 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
    final double trueMemoryLength = 1e5;
    final double trueStandardDeviation = 0.2;
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> data = new ArrayList<>();
    for (int n = 0; n < positions.size(); n++) {
        final double copyRatio = trueLog2CopyRatios[trueState];
        final double observed = generateData(trueStandardDeviation, copyRatio, rng);
        data.add(observed);
    }
    final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
    final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) Test(org.testng.annotations.Test) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 43 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCounts.

@Test
public void testSparkGenomeReadCounts() throws IOException {
    final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
    final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "10000" };
    runCommandLine(arguments);
    Assert.assertTrue(outputFile.exists());
    Assert.assertTrue(outputFile.length() > 0);
    final ReadCountCollection coverage = ReadCountCollectionUtils.parse(outputFile);
    final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
    Assert.assertTrue(targetsFile.exists());
    Assert.assertTrue(targetsFile.length() > 0);
    final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
    Assert.assertEquals(targets.size(), 8);
    Assert.assertEquals(targets.get(1).getEnd(), 16000);
    Assert.assertEquals(targets.get(5).getName(), "target_3_10001_16000");
    Assert.assertEquals(coverage.targets().size(), targets.size());
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 44 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsInterval.

@Test
public void testSparkGenomeReadCountsInterval() {
    final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
    final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "10000", "-L", "1" };
    runCommandLine(arguments);
    final ReadCountCollection proportionalCoverage = loadReadCountCollection(outputFile);
    Assert.assertTrue(proportionalCoverage.records().stream().noneMatch(t -> t.getContig().equals("2") || t.getContig().equals("3")));
    // raw coverage
    final ReadCountCollection rawCoverage = loadReadCountCollection(new File(outputFile.getAbsolutePath() + SparkGenomeReadCounts.RAW_COV_OUTPUT_EXTENSION));
    Assert.assertTrue(rawCoverage.records().stream().noneMatch(t -> t.getContig().equals("2") || t.getContig().equals("3")));
    final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
    final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
    Assert.assertTrue(targets.stream().allMatch(t -> t.getContig().equals("1")));
}
Also used : List(java.util.List) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Assert(org.testng.Assert) Target(org.broadinstitute.hellbender.tools.exome.Target) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 45 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class TitanFileConverter method convertCRToTitanCovFile.

/**
     * Create a target file that is compatible with TITAN.
     *
     * @param tnFile Readable file from {@link org.broadinstitute.hellbender.tools.exome.NormalizeSomaticReadCounts}
     * @param outputFile Not {@code null}
     */
public static void convertCRToTitanCovFile(final File tnFile, final File outputFile) {
    IOUtils.canReadFile(tnFile);
    try {
        final ReadCountCollection rcc = ReadCountCollectionUtils.parse(tnFile);
        final TitanCopyRatioEstimateWriter titanCopyRatioEstimateWriter = new TitanCopyRatioEstimateWriter(outputFile);
        titanCopyRatioEstimateWriter.writeAllRecords(rcc.records());
        titanCopyRatioEstimateWriter.close();
    } catch (final IOException ioe) {
        throw new UserException.BadInput("Bad output file: " + outputFile);
    }
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)74 Test (org.testng.annotations.Test)48 Target (org.broadinstitute.hellbender.tools.exome.Target)40 File (java.io.File)30 IOException (java.io.IOException)30 Collectors (java.util.stream.Collectors)30 List (java.util.List)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 IntStream (java.util.stream.IntStream)26 Assert (org.testng.Assert)26 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)24 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 Median (org.apache.commons.math3.stat.descriptive.rank.Median)22 ArrayList (java.util.ArrayList)20 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)20 Logger (org.apache.logging.log4j.Logger)20 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)20 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)18 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)18 DoubleStream (java.util.stream.DoubleStream)16