use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method subsetReadCountsToUsableTargets.
/**
* Subsets targets in the input count to the usable ones based on the percentile threshold indicated
* by the user.
*
* <p>
* It returns a pair of object, where the left one is the updated read-counts with only the usable
* targets, and the right one is the corresponding target factors.
* </p>
*
* @param readCounts the input read-counts.
* @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful.
* @return never {@code null}.
*/
@VisibleForTesting
static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts, final double targetFactorPercentileThreshold, final Logger logger) {
final double[] targetFactors = calculateTargetFactors(readCounts);
final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors);
final List<Target> targetByIndex = readCounts.targets();
final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold).mapToObj(targetByIndex::get).collect(Collectors.toCollection(LinkedHashSet::new));
if (result.size() == targetByIndex.size()) {
logger.info(String.format("All %d targets are kept", targetByIndex.size()));
return new ImmutablePair<>(readCounts, targetFactors);
} else {
final int discardedCount = targetFactors.length - result.size();
logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold));
final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray();
return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset);
}
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project RecurrentComplex by Ivorforce.
the class FactorMatch method consider.
@Override
public List<Pair<LineSelection, Float>> consider(WorldCache cache, LineSelection considerable, @Nullable IvBlockCollection blockCollection, StructurePlaceContext context) {
if (blockCollection == null)
throw new IllegalArgumentException("Missing a block collection!");
List<Pair<LineSelection, Float>> consideration = new ArrayList<>();
int[] size = StructureBoundingBoxes.size(context.boundingBox);
BlockPos lowerCoord = StructureBoundingBoxes.min(context.boundingBox);
Set<BlockPos.MutableBlockPos> sources = BlockAreas.streamMutablePositions(blockCollection.area()).filter(p -> sourceMatcher.evaluate(() -> blockCollection.getBlockState(p))).map(p -> new BlockPos.MutableBlockPos(context.transform.apply(p, size).add(lowerCoord.getX(), 0, lowerCoord.getZ()))).collect(Collectors.toSet());
for (IntegerRange range : (Iterable<IntegerRange>) considerable.streamSections(null, true)::iterator) {
Float curConformity = null;
int lastY = range.getMax();
int end = range.getMin();
for (int y = lastY; y >= end; y--) {
int finalY = y;
sources.forEach(p -> p.move(EnumFacing.UP, finalY));
float conformity = weight(cache, sources, requiredConformity);
sources.forEach(p -> p.move(EnumFacing.DOWN, finalY));
if (curConformity == null) {
curConformity = conformity;
lastY = y;
} else if (!DoubleMath.fuzzyEquals(conformity, curConformity, 0.01)) {
consideration.add(Pair.of(LineSelection.fromRange(IntegerRanges.from(lastY, y + 1), true), weight(curConformity)));
curConformity = conformity;
lastY = y;
}
}
if (curConformity != null)
consideration.add(Pair.of(LineSelection.fromRange(IntegerRanges.from(lastY, end), true), weight(curConformity)));
}
return consideration;
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.
the class FilterByOrientationBias method onTraversalSuccess.
@Override
public Object onTraversalSuccess() {
logger.info("Tagging whether genotypes are in one of the artifact modes.");
// Calculate how many artifacts need to be cut
double fdrThreshold = 0.01;
final List<VariantContext> finalVariants = OrientationBiasFilterer.annotateVariantContextsWithFilterResults(fdrThreshold, relevantTransitions, firstPassVariants, transitionToPreAdapterScoreMap);
logger.info("Writing variants to VCF...");
finalVariants.forEach(vcfWriter::add);
logger.info("Writing a simple summary table...");
List<String> sampleNames = new ArrayList<>();
if (finalVariants.size() != 0) {
sampleNames = finalVariants.get(0).getSampleNamesOrderedByName();
}
final List<Pair<String, Transition>> sampleTransitionCombinations = new ArrayList<>();
for (Transition relevantTransition : relevantTransitions) {
for (String sampleName : sampleNames) {
sampleTransitionCombinations.add(Pair.of(sampleName, relevantTransition));
}
}
OrientationBiasUtils.writeOrientationBiasSummaryTable(sampleTransitionCombinations, finalVariants, transitionToPreAdapterScoreMap, new File(outputFile.getAbsolutePath() + SUMMARY_FILE_SUFFIX));
return null;
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.
the class RecalUtils method generateReportTables.
public static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final StandardCovariateList covariates) {
final List<GATKReportTable> result = new LinkedList<>();
int rowIndex = 0;
GATKReportTable allCovsReportTable = null;
for (NestedIntegerArray<RecalDatum> table : recalibrationTables) {
// initialize the array to hold the column names
final ArrayList<Pair<String, String>> columnNames = new ArrayList<>();
// save the required covariate name so we can reference it in the future
columnNames.add(new MutablePair<>(covariates.getReadGroupCovariate().parseNameForReport(), "%s"));
if (!recalibrationTables.isReadGroupTable(table)) {
// save the required covariate name so we can reference it in the future
columnNames.add(new MutablePair<>(covariates.getQualityScoreCovariate().parseNameForReport(), "%d"));
if (recalibrationTables.isAdditionalCovariateTable(table)) {
columnNames.add(covariateValue);
columnNames.add(covariateName);
}
}
// the order of these column names is important here
columnNames.add(eventType);
columnNames.add(empiricalQuality);
if (recalibrationTables.isReadGroupTable(table)) {
// only the read group table needs the estimated Q reported
columnNames.add(estimatedQReported);
}
columnNames.add(nObservations);
columnNames.add(nErrors);
final String reportTableName = getReportTableName(recalibrationTables, table);
final GATKReportTable.Sorting sort = GATKReportTable.Sorting.SORT_BY_COLUMN;
final GATKReportTable reportTable;
final boolean addToList;
//XXX this "if" implicitly uses the knowledge about the ordering of tables.
if (!recalibrationTables.isAdditionalCovariateTable(table)) {
reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
// reset the row index since we're starting with a new table
rowIndex = 0;
addToList = true;
} else if (allCovsReportTable == null && recalibrationTables.isAdditionalCovariateTable(table)) {
reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
// reset the row index since we're starting with a new table
rowIndex = 0;
allCovsReportTable = reportTable;
addToList = true;
} else {
reportTable = allCovsReportTable;
addToList = false;
}
for (final NestedIntegerArray.Leaf<RecalDatum> row : table.getAllLeaves()) {
final RecalDatum datum = row.value;
final int[] keys = row.keys;
int columnIndex = 0;
int keyIndex = 0;
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getReadGroupCovariate().formatKey(keys[keyIndex++]));
if (!recalibrationTables.isReadGroupTable(table)) {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getQualityScoreCovariate().formatKey(keys[keyIndex++]));
if (recalibrationTables.isAdditionalCovariateTable(table)) {
final Covariate covariate = recalibrationTables.getCovariateForTable(table);
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.formatKey(keys[keyIndex++]));
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.parseNameForReport());
}
}
final EventType event = EventType.eventFrom(keys[keyIndex]);
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), event.toString());
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEmpiricalQuality());
if (recalibrationTables.isReadGroupTable(table)) {
// we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEstimatedQReported());
}
reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getNumObservations());
reportTable.set(rowIndex, columnNames.get(columnIndex).getLeft(), datum.getNumMismatches());
rowIndex++;
}
if (addToList) {
//XXX using a set would be slow because the equals method on GATKReportTable is expensive.
result.add(reportTable);
}
}
return result;
}
use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.
the class GCBiasSimulatedData method simulatedData.
// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
final Random random = new Random(13);
final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
// model mainly GC bias with a small random amount of non-GC bias
// thus noise after GC correction should be nearly zero
final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int target, final int column, final double value) {
return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
}
});
final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
return new ImmutablePair<>(rcc, gcContentByTarget);
}
Aggregations