Search in sources :

Example 76 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method simpleEigensampleData.

@DataProvider(name = "singleEigensample")
public Object[][] simpleEigensampleData() {
    final List<Object[]> result = new ArrayList<>();
    final int NUM_TARGETS = 10;
    final int NUM_SAMPLES = 5;
    final List<Target> targets = IntStream.range(0, NUM_TARGETS).boxed().map(i -> new Target("target_" + i, new SimpleInterval("1", 100 * i + 1, 100 * i + 5))).collect(Collectors.toList());
    final List<String> columnNames = IntStream.range(0, NUM_SAMPLES).boxed().map(i -> "sample_" + i).collect(Collectors.toList());
    double[][] countsArray = new double[NUM_TARGETS][NUM_SAMPLES];
    final RealMatrix counts = new Array2DRowRealMatrix(countsArray);
    // All row data is the same (0,1,2,3,4...)
    final double[] rowData = IntStream.range(0, NUM_SAMPLES).boxed().mapToDouble(i -> i).toArray();
    for (int i = 0; i < NUM_TARGETS; i++) {
        counts.setRow(i, rowData);
    }
    new ReadCountCollection(targets, columnNames, counts);
    result.add(new Object[] { new ReadCountCollection(targets, columnNames, counts) });
    return result.toArray(new Object[result.size()][]);
}
Also used : IntStream(java.util.stream.IntStream) SVD(org.broadinstitute.hellbender.utils.svd.SVD) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Level(org.apache.logging.log4j.Level) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) Test(org.testng.annotations.Test) Random(java.util.Random) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ArrayList(java.util.ArrayList) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Pair(org.apache.commons.lang3.tuple.Pair) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Stream(java.util.stream.Stream) Target(org.broadinstitute.hellbender.tools.exome.Target) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ArrayList(java.util.ArrayList) Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) DataProvider(org.testng.annotations.DataProvider)

Example 77 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method readCountOnlyWithDiverseShapeData.

@DataProvider(name = "readCountOnlyWithDiverseShapeData")
public Object[][] readCountOnlyWithDiverseShapeData() {
    final List<Object[]> result = new ArrayList<>(4);
    final Random rdn = new Random(31);
    final int[] columnCounts = new int[] { 10, 100, 100, 200 };
    final int[] targetCounts = new int[] { 100, 100, 200, 200 };
    for (int k = 0; k < columnCounts.length; k++) {
        final List<String> columnNames = IntStream.range(0, columnCounts[k]).mapToObj(i -> "sample_" + (i + 1)).collect(Collectors.toList());
        final List<Target> targets = IntStream.range(0, targetCounts[k]).mapToObj(i -> new Target("target_" + (i + 1))).collect(Collectors.toList());
        final double[][] counts = new double[targetCounts[k]][columnCounts[k]];
        for (int i = 0; i < counts.length; i++) {
            for (int j = 0; j < counts[0].length; j++) {
                counts[i][j] = rdn.nextDouble();
            }
        }
        final ReadCountCollection readCounts = new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(counts, false));
        result.add(new Object[] { readCounts });
    }
    return result.toArray(new Object[result.size()][]);
}
Also used : IntStream(java.util.stream.IntStream) SVD(org.broadinstitute.hellbender.utils.svd.SVD) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Level(org.apache.logging.log4j.Level) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) Test(org.testng.annotations.Test) Random(java.util.Random) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ArrayList(java.util.ArrayList) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Pair(org.apache.commons.lang3.tuple.Pair) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Stream(java.util.stream.Stream) Target(org.broadinstitute.hellbender.tools.exome.Target) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ArrayList(java.util.ArrayList) Target(org.broadinstitute.hellbender.tools.exome.Target) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DataProvider(org.testng.annotations.DataProvider)

Example 78 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class PCATangentNormalizationUtilsUnitTest method normalizeReadCountByTargetFactorsData.

@DataProvider(name = "normalizeReadCountByTargetFactorsData")
public Object[][] normalizeReadCountByTargetFactorsData() {
    final List<Object[]> result = new ArrayList<>(1);
    @SuppressWarnings("serial") final List<Target> targets = new ArrayList<Target>() {

        {
            add(new Target("A"));
            add(new Target("B"));
            add(new Target("C"));
        }
    };
    @SuppressWarnings("serial") final List<String> columnNames = new ArrayList<String>() {

        {
            add("1");
            add("2");
            add("3");
        }
    };
    result.add(new Object[] { new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(new double[][] { new double[] { 1.1, 2.2, 3.3 }, new double[] { 0.1, 0.2, 0.3 }, new double[] { 11.1, 22.2, 33.3 } }, false)), new double[] { 100.0, 200.0, 300.0 } });
    return result.toArray(new Object[1][]);
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ArrayList(java.util.ArrayList) DataProvider(org.testng.annotations.DataProvider)

Example 79 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class CaseToPoNTargetMapper method fromPoNToCaseCounts.

/**
     * Re-arrange the input rows from the PoN to the case data target order.
     * @param ponCounts count matrix with row organized using the PoN target order.
     * @return never {@code null} a new matrix with the row order changed according to the case read count target order.
     */
public RealMatrix fromPoNToCaseCounts(final RealMatrix ponCounts) {
    final Map<String, Integer> ponTargetsIndexes = IntStream.range(0, ponTargetNames.size()).boxed().collect(Collectors.toMap(ponTargetNames::get, Function.identity()));
    final RealMatrix result = new Array2DRowRealMatrix(ponCounts.getRowDimension(), ponCounts.getColumnDimension());
    for (int i = 0; i < outputTargets.size(); i++) {
        final Target target = outputTargets.get(i);
        result.setRow(i, ponCounts.getRow(ponTargetsIndexes.get(target.getName())));
    }
    return result;
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix)

Example 80 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class HDF5PCACoveragePoN method write.

/**
     * Write all of the coverage PoN fields to HDF5.
     */
public static void write(final File outFile, final HDF5File.OpenMode openMode, final List<Target> rawTargets, final ReadCountCollection normalizedCounts, final ReadCountCollection logNormalizedCounts, final double[] targetFactors, final double[] targetVariances, final ReductionResult reduction) {
    Utils.nonNull(outFile);
    Utils.nonNull(normalizedCounts);
    Utils.nonNull(logNormalizedCounts);
    Utils.nonNull(rawTargets);
    Utils.nonNull(targetFactors);
    Utils.nonNull(targetVariances);
    Utils.nonNull(reduction);
    try (final HDF5File file = new HDF5File(outFile, openMode)) {
        logger.info("Creating " + outFile.getAbsolutePath() + "...");
        final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
        logger.info("Setting version number (" + CURRENT_PON_VERSION + ")...");
        pon.setVersion(CURRENT_PON_VERSION);
        final List<Target> targets = normalizedCounts.targets();
        final List<Target> panelTargets = logNormalizedCounts.targets();
        logger.info("Setting targets ...");
        pon.setTargets(targets);
        logger.info("Setting raw targets ...");
        pon.setRawTargets(rawTargets);
        logger.info("Setting reduced panel targets ...");
        pon.setPanelTargets(panelTargets);
        logger.info("Setting target factors (" + targetFactors.length + ") ...");
        pon.setTargetFactors(targetFactors);
        logger.info("Setting target variances...");
        pon.setTargetVariances(targetVariances);
        logger.info("Setting normalized counts (" + normalizedCounts.counts().getRowDimension() + " x " + normalizedCounts.counts().getColumnDimension() + ") (T)...");
        pon.setNormalizedCounts(normalizedCounts.counts());
        logger.info("Setting log-normalized counts (" + logNormalizedCounts.counts().getRowDimension() + " x " + logNormalizedCounts.counts().getColumnDimension() + ") (T) ...");
        pon.setLogNormalizedCounts(logNormalizedCounts.counts());
        logger.info("Setting log-normalized pseudoinverse (" + reduction.getPseudoInverse().getRowDimension() + " x " + reduction.getPseudoInverse().getColumnDimension() + ") ...");
        pon.setLogNormalizedPInverseCounts(reduction.getPseudoInverse());
        logger.info("Setting reduced panel counts (" + reduction.getReducedCounts().getRowDimension() + " x " + reduction.getReducedCounts().getColumnDimension() + ") (T) ...");
        pon.setReducedPanelCounts(reduction.getReducedCounts());
        logger.info("Setting reduced panel pseudoinverse (" + reduction.getReducedPseudoInverse().getRowDimension() + " x " + reduction.getReducedPseudoInverse().getColumnDimension() + ") ...");
        pon.setReducedPanelPInverseCounts(reduction.getReducedPseudoInverse());
        final List<String> targetNames = normalizedCounts.targets().stream().map(Target::getName).collect(Collectors.toList());
        final List<String> rawTargetNames = rawTargets.stream().map(Target::getName).collect(Collectors.toList());
        final List<String> panelTargetNames = logNormalizedCounts.targets().stream().map(Target::getName).collect(Collectors.toList());
        logger.info("Setting target names ...");
        pon.setTargetNames(targetNames);
        logger.info("Setting raw target names ...");
        pon.setRawTargetNames(rawTargetNames);
        logger.info("Setting reduced target names ...");
        pon.setPanelTargetNames(panelTargetNames);
        final List<String> sampleNames = normalizedCounts.columnNames();
        final List<String> panelSampleNames = logNormalizedCounts.columnNames();
        logger.info("Setting sample names ...");
        pon.setSampleNames(sampleNames);
        logger.info("Setting reduced sample names ...");
        pon.setPanelSampleNames(panelSampleNames);
    }
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) HDF5File(org.broadinstitute.hdf5.HDF5File)

Aggregations

Target (org.broadinstitute.hellbender.tools.exome.Target)110 Test (org.testng.annotations.Test)56 File (java.io.File)52 Collectors (java.util.stream.Collectors)42 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)42 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)38 IOException (java.io.IOException)32 java.util (java.util)32 IntStream (java.util.stream.IntStream)32 Assert (org.testng.Assert)32 Pair (org.apache.commons.lang3.tuple.Pair)26 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)26 UserException (org.broadinstitute.hellbender.exceptions.UserException)26 Genotype (htsjdk.variant.variantcontext.Genotype)22 List (java.util.List)22 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)22 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)22 DataProvider (org.testng.annotations.DataProvider)22 VariantContext (htsjdk.variant.variantcontext.VariantContext)20 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)20