use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsBigBins.
@Test
public void testSparkGenomeReadCountsBigBins() throws IOException {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "16000" };
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
Assert.assertTrue(outputFile.length() > 0);
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(outputFile);
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
Assert.assertTrue(targetsFile.exists());
Assert.assertTrue(targetsFile.length() > 0);
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertEquals(targets.size(), 4);
Assert.assertEquals(targets.get(1).getEnd(), 16000);
Assert.assertEquals(targets.get(2).getName(), "target_3_1_16000");
Assert.assertEquals(coverage.targets().size(), targets.size());
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> data = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final double copyRatio = trueLog2CopyRatios[trueState];
final double observed = generateData(trueStandardDeviation, copyRatio, rng);
data.add(observed);
}
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCounts.
@Test
public void testSparkGenomeReadCounts() throws IOException {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "10000" };
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
Assert.assertTrue(outputFile.length() > 0);
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(outputFile);
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
Assert.assertTrue(targetsFile.exists());
Assert.assertTrue(targetsFile.length() > 0);
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertEquals(targets.size(), 8);
Assert.assertEquals(targets.get(1).getEnd(), 16000);
Assert.assertEquals(targets.get(5).getName(), "target_3_10001_16000");
Assert.assertEquals(coverage.targets().size(), targets.size());
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsInterval.
@Test
public void testSparkGenomeReadCountsInterval() {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "10000", "-L", "1" };
runCommandLine(arguments);
final ReadCountCollection proportionalCoverage = loadReadCountCollection(outputFile);
Assert.assertTrue(proportionalCoverage.records().stream().noneMatch(t -> t.getContig().equals("2") || t.getContig().equals("3")));
// raw coverage
final ReadCountCollection rawCoverage = loadReadCountCollection(new File(outputFile.getAbsolutePath() + SparkGenomeReadCounts.RAW_COV_OUTPUT_EXTENSION));
Assert.assertTrue(rawCoverage.records().stream().noneMatch(t -> t.getContig().equals("2") || t.getContig().equals("3")));
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertTrue(targets.stream().allMatch(t -> t.getContig().equals("1")));
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class TitanFileConverter method convertCRToTitanCovFile.
/**
* Create a target file that is compatible with TITAN.
*
* @param tnFile Readable file from {@link org.broadinstitute.hellbender.tools.exome.NormalizeSomaticReadCounts}
* @param outputFile Not {@code null}
*/
public static void convertCRToTitanCovFile(final File tnFile, final File outputFile) {
IOUtils.canReadFile(tnFile);
try {
final ReadCountCollection rcc = ReadCountCollectionUtils.parse(tnFile);
final TitanCopyRatioEstimateWriter titanCopyRatioEstimateWriter = new TitanCopyRatioEstimateWriter(outputFile);
titanCopyRatioEstimateWriter.writeAllRecords(rcc.records());
titanCopyRatioEstimateWriter.close();
} catch (final IOException ioe) {
throw new UserException.BadInput("Bad output file: " + outputFile);
}
}
Aggregations