use of org.apache.spark.api.java.JavaSparkContext in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns.
@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns(final ReadCountCollection readCounts) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.of(readCounts.columnNames().size() / 2), NULL_LOGGER, ctx);
final RealMatrix counts = readCounts.counts();
Assert.assertNotNull(result);
Assert.assertNotNull(result.getPseudoInverse());
Assert.assertNotNull(result.getReducedCounts());
Assert.assertNotNull(result.getReducedPseudoInverse());
Assert.assertNotNull(result.getAllSingularValues());
Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
Assert.assertEquals(result.getReducedCounts().getColumnDimension(), readCounts.columnNames().size() / 2);
final int eigensamples = result.getReducedCounts().getColumnDimension();
Assert.assertEquals(eigensamples, readCounts.columnNames().size() / 2);
assertPseudoInverse(counts, result.getPseudoInverse());
assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
use of org.apache.spark.api.java.JavaSparkContext in project gatk by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_NO_SUSPICIOUS_SAMPLES_FILE);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_NO_SUSPICIOUS_SAMPLES_FILE, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
Assert.assertEquals(blacklistSamples.size(), 0);
}
use of org.apache.spark.api.java.JavaSparkContext in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsDelsWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsDelsWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_DEL);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_DEL, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
use of org.apache.spark.api.java.JavaSparkContext in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsNoSuspiciousSamplesWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_NO_SUSPICIOUS_SAMPLES_FILE);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_NO_SUSPICIOUS_SAMPLES_FILE, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
Assert.assertEquals(blacklistSamples.size(), 0);
}
use of org.apache.spark.api.java.JavaSparkContext in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtilsUnitTest method testIdentifySamplesWithSuspiciousContigsAmpsWithSpark.
@Test
public void testIdentifySamplesWithSuspiciousContigsAmpsWithSpark() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final Set<String> gtBlacklistSamples = new HashSet<>();
gtBlacklistSamples.add("sample_1");
gtBlacklistSamples.add("sample_2");
gtBlacklistSamples.add("sample_3");
ReadCountCollection allCoverageProfiles = null;
try {
allCoverageProfiles = ReadCountCollectionUtils.parse(TEST_FILE_AMP);
} catch (final IOException ioe) {
Assert.fail("Could not load test file: " + TEST_FILE_AMP, ioe);
}
final JavaRDD<ReadCountCollection> allSampleTangentNormalizedReadCounts = CoveragePoNQCUtils.createParallelIndividualReadCountCollections(allCoverageProfiles, ctx);
// By the time we are here, input is assumed to have been tangent normalized.
final List<String> blacklistSamples = CoveragePoNQCUtils.identifySamplesWithSuspiciousContigs(allSampleTangentNormalizedReadCounts, ctx, CoveragePoNQCUtils.getContigToMedianCRMap(allCoverageProfiles));
final Set<String> resultSamples = new HashSet<>(blacklistSamples);
Assert.assertEquals(resultSamples.size(), gtBlacklistSamples.size());
Assert.assertEquals(Sets.difference(resultSamples, gtBlacklistSamples).size(), 0);
}
Aggregations