Search in sources :

Example 26 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsDelsWithSpark.

@Test
public void testIdentifySamplesWithSuspiciousContigsDelsWithSpark() {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final Set<String> gtBlacklistSamples = new HashSet<>();
    gtBlacklistSamples.add("sample_1");
    gtBlacklistSamples.add("sample_2");
    gtBlacklistSamples.add("sample_3");
    ReadCountCollection allCoverageProfiles = null;
    try {
        allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_DEL);
    } catch (final IOException ioe) {
        Assert.fail("Could not load test file: " + TEST_FILE_DEL, ioe);
    }
    final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
    // By the time we are here, input is assumed to have been tangent normalized.
    final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
    final Set<String> resultSamples = new HashSet<>(blacklistSamples);
    Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
    Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) IOException(java.io.IOException) HashSet(java.util.HashSet) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 27 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsAmps.

@Test
public void testIdentifySamplesWithSuspiciousContigsAmps() {
    final Set<String> gtBlacklistSamples = new HashSet<>();
    gtBlacklistSamples.add("sample_1");
    gtBlacklistSamples.add("sample_2");
    gtBlacklistSamples.add("sample_3");
    ReadCountCollection allCoverageProfiles = null;
    try {
        allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_AMP);
    } catch (final IOException ioe) {
        Assert.fail("Could not load test file: " + TEST_FILE_AMP, ioe);
    }
    final List<ReadCountCollection> singleSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createIndividualReadCountCollections(allCoverageProfiles);
    // By the time we are here, input is assumed to have been tangent normalized.
    final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(singleSampleTangentNormalizedReadCounts, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
    final Set<String> resultSamples = new HashSet<>(blacklistSamples);
    Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
    Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) IOException(java.io.IOException) HashSet(java.util.HashSet) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 28 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark.

@Test
public void testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark() {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    ReadCountCollection allCoverageProfiles = null;
    try {
        allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_NO_SUSPICIOUS_SAMPLES_FILE);
    } catch (final IOException ioe) {
        Assert.fail("Could not load test file: " + TEST_NO_SUSPICIOUS_SAMPLES_FILE, ioe);
    }
    final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
    // By the time we are here, input is assumed to have been tangent normalized.
    final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
    Assert.assertEquals(blacklistSamples.size(), 0);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) IOException(java.io.IOException) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 29 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsDels.

@Test
public void testIdentifySamplesWithSuspiciousContigsDels() {
    final Set<String> gtBlacklistSamples = new HashSet<>();
    gtBlacklistSamples.add("sample_1");
    gtBlacklistSamples.add("sample_2");
    gtBlacklistSamples.add("sample_3");
    ReadCountCollection allCoverageProfiles = null;
    try {
        allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_DEL);
    } catch (final IOException ioe) {
        Assert.fail("Could not load test file: " + TEST_FILE_DEL, ioe);
    }
    final List<ReadCountCollection> singleSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createIndividualReadCountCollections(allCoverageProfiles);
    // By the time we are here, input is assumed to have been tangent normalized.
    final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(singleSampleTangentNormalizedReadCounts, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
    final Set<String> resultSamples = new HashSet<>(blacklistSamples);
    Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
    Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) IOException(java.io.IOException) HashSet(java.util.HashSet) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 30 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsAmpsWithSpark.

@Test
public void testIdentifySamplesWithSuspiciousContigsAmpsWithSpark() {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final Set<String> gtBlacklistSamples = new HashSet<>();
    gtBlacklistSamples.add("sample_1");
    gtBlacklistSamples.add("sample_2");
    gtBlacklistSamples.add("sample_3");
    ReadCountCollection allCoverageProfiles = null;
    try {
        allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_AMP);
    } catch (final IOException ioe) {
        Assert.fail("Could not load test file: " + TEST_FILE_AMP, ioe);
    }
    final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
    // By the time we are here, input is assumed to have been tangent normalized.
    final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
    final Set<String> resultSamples = new HashSet<>(blacklistSamples);
    Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
    Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) IOException(java.io.IOException) HashSet(java.util.HashSet) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)74 Test (org.testng.annotations.Test)48 Target (org.broadinstitute.hellbender.tools.exome.Target)40 File (java.io.File)30 IOException (java.io.IOException)30 Collectors (java.util.stream.Collectors)30 List (java.util.List)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 IntStream (java.util.stream.IntStream)26 Assert (org.testng.Assert)26 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)24 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 Median (org.apache.commons.math3.stat.descriptive.rank.Median)22 ArrayList (java.util.ArrayList)20 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)20 Logger (org.apache.logging.log4j.Logger)20 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)20 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)18 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)18 DoubleStream (java.util.stream.DoubleStream)16