Search in sources :

Example 26 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method subsetReadCountsToUsableTargets.

/**
     * Subsets targets in the input count to the usable ones based on the percentile threshold indicated
     * by the user.
     *
     * <p>
     *     It returns a pair of object, where the left one is the updated read-counts with only the usable
     *     targets, and the right one is the corresponding target factors.
     * </p>
     *
     * @param readCounts the input read-counts.
     * @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful.
     * @return never {@code null}.
     */
@VisibleForTesting
static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts, final double targetFactorPercentileThreshold, final Logger logger) {
    final double[] targetFactors = calculateTargetFactors(readCounts);
    final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors);
    final List<Target> targetByIndex = readCounts.targets();
    final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold).mapToObj(targetByIndex::get).collect(Collectors.toCollection(LinkedHashSet::new));
    if (result.size() == targetByIndex.size()) {
        logger.info(String.format("All %d targets are kept", targetByIndex.size()));
        return new ImmutablePair<>(readCounts, targetFactors);
    } else {
        final int discardedCount = targetFactors.length - result.size();
        logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold));
        final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray();
        return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) SVD(org.broadinstitute.hellbender.utils.svd.SVD) java.util(java.util) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Pair(org.apache.commons.lang3.tuple.Pair) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LogManager(org.apache.logging.log4j.LogManager) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 27 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project RecurrentComplex by Ivorforce.

the class FactorMatch method consider.

@Override
public List<Pair<LineSelection, Float>> consider(WorldCache cache, LineSelection considerable, @Nullable IvBlockCollection blockCollection, StructurePlaceContext context) {
    if (blockCollection == null)
        throw new IllegalArgumentException("Missing a block collection!");
    List<Pair<LineSelection, Float>> consideration = new ArrayList<>();
    int[] size = StructureBoundingBoxes.size(context.boundingBox);
    BlockPos lowerCoord = StructureBoundingBoxes.min(context.boundingBox);
    Set<BlockPos.MutableBlockPos> sources = BlockAreas.streamMutablePositions(blockCollection.area()).filter(p -> sourceMatcher.evaluate(() -> blockCollection.getBlockState(p))).map(p -> new BlockPos.MutableBlockPos(context.transform.apply(p, size).add(lowerCoord.getX(), 0, lowerCoord.getZ()))).collect(Collectors.toSet());
    for (IntegerRange range : (Iterable<IntegerRange>) considerable.streamSections(null, true)::iterator) {
        Float curConformity = null;
        int lastY = range.getMax();
        int end = range.getMin();
        for (int y = lastY; y >= end; y--) {
            int finalY = y;
            sources.forEach(p -> p.move(EnumFacing.UP, finalY));
            float conformity = weight(cache, sources, requiredConformity);
            sources.forEach(p -> p.move(EnumFacing.DOWN, finalY));
            if (curConformity == null) {
                curConformity = conformity;
                lastY = y;
            } else if (!DoubleMath.fuzzyEquals(conformity, curConformity, 0.01)) {
                consideration.add(Pair.of(LineSelection.fromRange(IntegerRanges.from(lastY, y + 1), true), weight(curConformity)));
                curConformity = conformity;
                lastY = y;
            }
        }
        if (curConformity != null)
            consideration.add(Pair.of(LineSelection.fromRange(IntegerRanges.from(lastY, end), true), weight(curConformity)));
    }
    return consideration;
}
Also used : IvBlockCollection(ivorius.ivtoolkit.blocks.IvBlockCollection) BlockExpression(ivorius.reccomplex.utils.expression.BlockExpression) java.util(java.util) BlockAreas(ivorius.ivtoolkit.blocks.BlockAreas) TableDataSource(ivorius.reccomplex.gui.table.datasource.TableDataSource) StructureBoundingBoxes(ivorius.ivtoolkit.world.chunk.gen.StructureBoundingBoxes) Pair(org.apache.commons.lang3.tuple.Pair) ivorius.reccomplex.utils(ivorius.reccomplex.utils) RecurrentComplex(ivorius.reccomplex.RecurrentComplex) PositionedBlockExpression(ivorius.reccomplex.utils.expression.PositionedBlockExpression) JsonUtils(ivorius.reccomplex.json.JsonUtils) Nullable(javax.annotation.Nullable) ExpressionCache(ivorius.reccomplex.utils.algebra.ExpressionCache) DoubleMath(com.google.common.math.DoubleMath) TableDataSourceFactorMatch(ivorius.reccomplex.gui.editstructure.placer.TableDataSourceFactorMatch) WorldCache(ivorius.ivtoolkit.world.WorldCache) EnumFacing(net.minecraft.util.EnumFacing) LineSelection(ivorius.ivtoolkit.util.LineSelection) BlockPos(net.minecraft.util.math.BlockPos) Collectors(java.util.stream.Collectors) TableNavigator(ivorius.reccomplex.gui.table.TableNavigator) Type(java.lang.reflect.Type) TableDelegate(ivorius.reccomplex.gui.table.TableDelegate) com.google.gson(com.google.gson) IntegerRange(ivorius.ivtoolkit.gui.IntegerRange) IntegerRange(ivorius.ivtoolkit.gui.IntegerRange) BlockPos(net.minecraft.util.math.BlockPos) Pair(org.apache.commons.lang3.tuple.Pair)

Example 28 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class FilterByOrientationBias method onTraversalSuccess.

@Override
public Object onTraversalSuccess() {
    logger.info("Tagging whether genotypes are in one of the artifact modes.");
    // Calculate how many artifacts need to be cut
    double fdrThreshold = 0.01;
    final List<VariantContext> finalVariants = OrientationBiasFilterer.annotateVariantContextsWithFilterResults(fdrThreshold, relevantTransitions, firstPassVariants, transitionToPreAdapterScoreMap);
    logger.info("Writing variants to VCF...");
    finalVariants.forEach(vcfWriter::add);
    logger.info("Writing a simple summary table...");
    List<String> sampleNames = new ArrayList<>();
    if (finalVariants.size() != 0) {
        sampleNames = finalVariants.get(0).getSampleNamesOrderedByName();
    }
    final List<Pair<String, Transition>> sampleTransitionCombinations = new ArrayList<>();
    for (Transition relevantTransition : relevantTransitions) {
        for (String sampleName : sampleNames) {
            sampleTransitionCombinations.add(Pair.of(sampleName, relevantTransition));
        }
    }
    OrientationBiasUtils.writeOrientationBiasSummaryTable(sampleTransitionCombinations, finalVariants, transitionToPreAdapterScoreMap, new File(outputFile.getAbsolutePath() + SUMMARY_FILE_SUFFIX));
    return null;
}
Also used : Transition(org.broadinstitute.hellbender.tools.picard.analysis.artifacts.Transition) VariantContext(htsjdk.variant.variantcontext.VariantContext) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 29 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class RecalUtils method generateReportTables.

public static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final StandardCovariateList covariates) {
    final List<GATKReportTable> result = new LinkedList<>();
    int rowIndex = 0;
    GATKReportTable allCovsReportTable = null;
    for (NestedIntegerArray<RecalDatum> table : recalibrationTables) {
        // initialize the array to hold the column names
        final ArrayList<Pair<String, String>> columnNames = new ArrayList<>();
        // save the required covariate name so we can reference it in the future
        columnNames.add(new MutablePair<>(covariates.getReadGroupCovariate().parseNameForReport(), "%s"));
        if (!recalibrationTables.isReadGroupTable(table)) {
            // save the required covariate name so we can reference it in the future
            columnNames.add(new MutablePair<>(covariates.getQualityScoreCovariate().parseNameForReport(), "%d"));
            if (recalibrationTables.isAdditionalCovariateTable(table)) {
                columnNames.add(covariateValue);
                columnNames.add(covariateName);
            }
        }
        // the order of these column names is important here
        columnNames.add(eventType);
        columnNames.add(empiricalQuality);
        if (recalibrationTables.isReadGroupTable(table)) {
            // only the read group table needs the estimated Q reported
            columnNames.add(estimatedQReported);
        }
        columnNames.add(nObservations);
        columnNames.add(nErrors);
        final String reportTableName = getReportTableName(recalibrationTables, table);
        final GATKReportTable.Sorting sort = GATKReportTable.Sorting.SORT_BY_COLUMN;
        final GATKReportTable reportTable;
        final boolean addToList;
        //XXX this "if" implicitly uses the knowledge about the ordering of tables.
        if (!recalibrationTables.isAdditionalCovariateTable(table)) {
            reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
            // reset the row index since we're starting with a new table
            rowIndex = 0;
            addToList = true;
        } else if (allCovsReportTable == null && recalibrationTables.isAdditionalCovariateTable(table)) {
            reportTable = makeNewTableWithColumns(columnNames, reportTableName, sort);
            // reset the row index since we're starting with a new table
            rowIndex = 0;
            allCovsReportTable = reportTable;
            addToList = true;
        } else {
            reportTable = allCovsReportTable;
            addToList = false;
        }
        for (final NestedIntegerArray.Leaf<RecalDatum> row : table.getAllLeaves()) {
            final RecalDatum datum = row.value;
            final int[] keys = row.keys;
            int columnIndex = 0;
            int keyIndex = 0;
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getReadGroupCovariate().formatKey(keys[keyIndex++]));
            if (!recalibrationTables.isReadGroupTable(table)) {
                reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariates.getQualityScoreCovariate().formatKey(keys[keyIndex++]));
                if (recalibrationTables.isAdditionalCovariateTable(table)) {
                    final Covariate covariate = recalibrationTables.getCovariateForTable(table);
                    reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.formatKey(keys[keyIndex++]));
                    reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), covariate.parseNameForReport());
                }
            }
            final EventType event = EventType.eventFrom(keys[keyIndex]);
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), event.toString());
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEmpiricalQuality());
            if (recalibrationTables.isReadGroupTable(table)) {
                // we only add the estimated Q reported in the RG table
                reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getEstimatedQReported());
            }
            reportTable.set(rowIndex, columnNames.get(columnIndex++).getLeft(), datum.getNumObservations());
            reportTable.set(rowIndex, columnNames.get(columnIndex).getLeft(), datum.getNumMismatches());
            rowIndex++;
        }
        if (addToList) {
            //XXX using a set would be slow because the equals method on GATKReportTable is expensive.
            result.add(reportTable);
        }
    }
    return result;
}
Also used : Covariate(org.broadinstitute.hellbender.utils.recalibration.covariates.Covariate) GATKReportTable(org.broadinstitute.hellbender.utils.report.GATKReportTable) MutablePair(org.apache.commons.lang3.tuple.MutablePair) Pair(org.apache.commons.lang3.tuple.Pair) NestedIntegerArray(org.broadinstitute.hellbender.utils.collections.NestedIntegerArray)

Example 30 with Pair

use of org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair in project gatk by broadinstitute.

the class GCBiasSimulatedData method simulatedData.

// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
    final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
    final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
    final Random random = new Random(13);
    final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
    final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
    // model mainly GC bias with a small random amount of non-GC bias
    // thus noise after GC correction should be nearly zero
    final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int target, final int column, final double value) {
            return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
        }
    });
    final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
    return new ImmutablePair<>(rcc, gcContentByTarget);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Arrays(java.util.Arrays) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Random(java.util.Random) Function(java.util.function.Function) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Collections(java.util.Collections) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Aggregations

Pair (org.apache.commons.lang3.tuple.Pair)685 ArrayList (java.util.ArrayList)209 List (java.util.List)154 Test (org.junit.Test)150 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)142 HashMap (java.util.HashMap)123 Collectors (java.util.stream.Collectors)123 Map (java.util.Map)112 Message (com.microsoft.azure.sdk.iot.device.Message)71 IOException (java.io.IOException)70 MutablePair (org.apache.commons.lang3.tuple.MutablePair)64 java.util (java.util)55 IotHubTransportMessage (com.microsoft.azure.sdk.iot.device.transport.IotHubTransportMessage)52 Set (java.util.Set)49 StringUtils (org.apache.commons.lang3.StringUtils)48 File (java.io.File)46 Optional (java.util.Optional)45 Arrays (java.util.Arrays)44 HashSet (java.util.HashSet)40 Test (org.junit.jupiter.api.Test)39