use of java.util.function.BiFunction in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method getCopyRatioSegmentsSpark.
/**
* Fetch copy ratio segments from compute blocks (Spark implementation)
*
* @return a list of {@link CopyRatioHMMResults}
*/
private List<List<HiddenStateSegmentRecord<STATE, Target>>> getCopyRatioSegmentsSpark() {
/* local final member variables for lambda capture */
final List<Target> processedTargetList = new ArrayList<>();
processedTargetList.addAll(this.processedTargetList);
final List<SexGenotypeData> processedSampleSexGenotypeData = new ArrayList<>();
processedSampleSexGenotypeData.addAll(this.processedSampleSexGenotypeData);
final List<String> processedSampleNameList = new ArrayList<>();
processedSampleNameList.addAll(this.processedSampleNameList);
final INDArray sampleReadDepths = Transforms.exp(sampleMeanLogReadDepths, true);
final CopyRatioExpectationsCalculator<CoverageModelCopyRatioEmissionData, STATE> copyRatioExpectationsCalculator = this.copyRatioExpectationsCalculator;
final BiFunction<SexGenotypeData, Target, STATE> referenceStateFactory = this.referenceStateFactory;
return fetchCopyRatioEmissionDataSpark().mapPartitionsToPair(it -> {
final List<Tuple2<Integer, CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE>>> newPartitionData = new ArrayList<>();
while (it.hasNext()) {
final Tuple2<Integer, List<CoverageModelCopyRatioEmissionData>> prevDatum = it.next();
final int sampleIndex = prevDatum._1;
final CopyRatioCallingMetadata copyRatioCallingMetadata = CopyRatioCallingMetadata.builder().sampleName(processedSampleNameList.get(sampleIndex)).sampleSexGenotypeData(processedSampleSexGenotypeData.get(sampleIndex)).sampleCoverageDepth(sampleReadDepths.getDouble(sampleIndex)).emissionCalculationStrategy(EmissionCalculationStrategy.HYBRID_POISSON_GAUSSIAN).build();
newPartitionData.add(new Tuple2<>(sampleIndex, copyRatioExpectationsCalculator.getCopyRatioHMMResults(copyRatioCallingMetadata, processedTargetList, prevDatum._2)));
}
return newPartitionData.iterator();
}, true).mapPartitionsToPair(it -> {
final List<Tuple2<Integer, List<HiddenStateSegmentRecord<STATE, Target>>>> newPartitionData = new ArrayList<>();
while (it.hasNext()) {
final Tuple2<Integer, CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE>> prevDatum = it.next();
final int sampleIndex = prevDatum._1;
final CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE> result = prevDatum._2;
final HMMSegmentProcessor<CoverageModelCopyRatioEmissionData, STATE, Target> processor = new HMMSegmentProcessor<>(Collections.singletonList(result.getMetaData().getSampleName()), Collections.singletonList(result.getMetaData().getSampleSexGenotypeData()), referenceStateFactory, Collections.singletonList(new HashedListTargetCollection<>(processedTargetList)), Collections.singletonList(result.getForwardBackwardResult()), Collections.singletonList(result.getViterbiResult()));
newPartitionData.add(new Tuple2<>(sampleIndex, processor.getSegmentsAsList()));
}
return newPartitionData.iterator();
}).collect().stream().sorted(Comparator.comparingInt(t -> t._1)).map(t -> t._2).collect(Collectors.toList());
}
use of java.util.function.BiFunction in project gatk-protected by broadinstitute.
the class ReadCountRecordUnitTest method testAppendCountsTo.
@Test(dataProvider = "testData", dependsOnMethods = "testCreation")
public void testAppendCountsTo(@SuppressWarnings("unused") final String testName, final BiFunction<Target, double[], ? extends ReadCountRecord> constructor, final int size) {
final double[] counts = generateCounts(size);
final boolean round = testName.equals("long[]");
final ReadCountRecord record = constructor.apply(TEST_TARGET, counts);
final List<String> columnNames = Stream.concat(Stream.concat(IntStream.range(0, 10).mapToObj(i -> "pre-padding_" + i), IntStream.range(0, counts.length).mapToObj(i -> "column_" + i)), IntStream.range(0, 10).mapToObj(i -> "post-padding_" + i)).collect(Collectors.toList());
final TableColumnCollection columns = new TableColumnCollection(columnNames);
final DataLine dataLine = new DataLine(columns, RuntimeException::new);
final double[] copiedCounts = new double[counts.length + 20];
Arrays.fill(copiedCounts, -11);
for (int i = 0; i < 10 + 10 + counts.length; i++) {
dataLine.append("-11");
}
dataLine.seek(10);
record.appendCountsTo(dataLine);
// Check the copied values.
if (!round) {
for (int i = 0; i < counts.length; i++) {
Assert.assertEquals(dataLine.getDouble(10 + i), counts[i], 0.0);
}
} else {
for (int i = 0; i < counts.length; i++) {
Assert.assertEquals(dataLine.getDouble(10 + i), Math.round(counts[i]), 0.00001);
}
}
// Check that the padding remains intact:
for (int i = 0; i < 10; i++) {
Assert.assertEquals(dataLine.get(i), "-11");
}
for (int i = counts.length + 10; i < copiedCounts.length; i++) {
Assert.assertEquals(dataLine.get(i), "-11");
}
}
use of java.util.function.BiFunction in project gatk-protected by broadinstitute.
the class ReadCountRecordUnitTest method testAppendCountsToBeyondEnd.
@Test(dataProvider = "testNonZeroCountsData", dependsOnMethods = "testAppendCountsTo", expectedExceptions = IllegalStateException.class)
public void testAppendCountsToBeyondEnd(@SuppressWarnings("unused") final String testName, final BiFunction<Target, double[], ? extends ReadCountRecord> constructor, final int size) {
final double[] counts = generateCounts(size);
final ReadCountRecord record = constructor.apply(TEST_TARGET, counts);
final List<String> columnNames = Stream.concat(Stream.concat(IntStream.range(0, 10).mapToObj(i -> "pre-padding_" + i), IntStream.range(0, counts.length).mapToObj(i -> "column_" + i)), IntStream.range(0, 10).mapToObj(i -> "post-padding_" + i)).collect(Collectors.toList());
final TableColumnCollection columns = new TableColumnCollection(columnNames);
final DataLine dataLine = new DataLine(columns, RuntimeException::new);
final double[] copiedCounts = new double[counts.length + 20];
Arrays.fill(copiedCounts, -11);
dataLine.seek(columnNames.size());
record.appendCountsTo(dataLine);
}
Aggregations