use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsDelsWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsDelsWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_DEL);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_DEL, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsAmps.
@Test
public void testIdentifySamplesWithSuspiciousContigsAmps() {
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_AMP);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_AMP, ioe);
}
final List<ReadCountCollection> singleSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createIndividualReadCountCollections(allCoverageProfiles);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(singleSampleTangentNormalizedReadCounts, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_NO_SUSPICIOUS_SAMPLES_FILE);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_NO_SUSPICIOUS_SAMPLES_FILE, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
Assert.assertEquals(blacklistSamples.size(), 0);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsDels.
@Test
public void testIdentifySamplesWithSuspiciousContigsDels() {
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_DEL);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_DEL, ioe);
}
final List<ReadCountCollection> singleSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createIndividualReadCountCollections(allCoverageProfiles);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(singleSampleTangentNormalizedReadCounts, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsAmpsWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsAmpsWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_AMP);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_AMP, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Aggregations