Search in sources :

Example 1 with Distribution

use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.

the class MeasurementSimulator method simulate.

/**
 * Simulates (optionally) neither, either, or both of two three-dimensionaly
 * data sets, rawData and measuredData. For the form of the first set, see
 * <code>getRawData</code>; for the form of the second set, see
 * <code>getMeasuredData</code>.  The idea of the simulation is that cells
 * are grown in different dishes and periodically sampled. When they are
 * sampled, their DNA is ground up and then pipetted in aggregate onto a
 * number of microarray chips. Each indivual cell is simulated separately;
 * the simulated cells are grouped into dishes. Expression levels of cells
 * in each dish are aggregated and then random noise is added to this
 * aggregate expression level representing various sources of noise: sample
 * to sample variation, chip to chip variation, and pixellation measurement
 * variation. For more information, see the Genetic Simulator spec, which
 * this method implements.
 *
 * @see #getRawData
 * @see #getMeasuredData
 */
public void simulate(GeneHistory history) {
    setHistory(history);
    history.reset();
    // Get the update function.
    UpdateFunction updateFunction = history.getUpdateFunction();
    int numFactors = updateFunction.getNumFactors();
    // Set up the dish model.
    DishModel dishModel = new DishModel(getNumDishes(), getDishDishVariability());
    // Set up the history.
    history.setInitSync(isInitSync());
    history.setDishModel(dishModel);
    // We will use two data cubes to store data for display
    // purposes: a measurement cube and a raw data cube. We first
    // construct the measurement cube, with two extra columns for
    // (a) dish number and (b) chip number.
    int numChips = getNumSamplesPerDish();
    if (isMeasuredDataSaved()) {
        int numSteps = timeSteps.length;
        int numSamples = getNumDishes() * numChips;
        this.measuredData = new double[numFactors][numSteps][numSamples];
    }
    // We next construct the raw data cube, with two extra columns
    // for (a) individual number and (b) dish number. Raw data is
    // saved out only if the 'rawDataSave' parameter is true. If
    // it's not true, these objects are not constructed. (They can
    // be extremely large.) References to the cube and the initial
    // columns need to be saved in this scope for future use (but
    // may be null).
    this.rawData = null;
    if (isRawDataSaved()) {
        int numSteps = timeSteps.length;
        int numCells = getNumDishes() * getNumCellsPerDish();
        this.rawData = new double[numFactors][numSteps][numCells];
    }
    // There are three sources of error--sample to sample, chip to
    // chip, and pixel digitalization.  A certain number of
    // samples are drawn from each dish, and these are deposited
    // on the same number of chips. There is an error associated
    // with each chip and with each sample. (Pixel digitalization
    // error is drawn later.)
    double sampleSd = getSampleSampleVariability();
    double chipSd = getChipChipVariability();
    double pixelSd = getPixelDigitalization();
    Distribution sampleErrorDist = new Normal(0.0, sampleSd);
    Distribution chipErrorDist = new Normal(0.0, chipSd);
    Distribution pixelErrorDist = new Normal(0.0, pixelSd);
    double[][] chipErrors = new double[getNumDishes()][numChips];
    double[][] sampleErrors = new double[getNumDishes()][numChips];
    for (int d = 0; d < getNumDishes(); d++) {
        for (int ch = 0; ch < numChips; ch++) {
            chipErrors[d][ch] = chipErrorDist.nextRandom();
            sampleErrors[d][ch] = sampleErrorDist.nextRandom();
        }
    }
    // Make two double[] arrays, one to store data generated for a
    // particular cell (in a particular dish) and one to store
    // aggregations of these data.
    double[][] cellData = new double[timeSteps.length][numFactors];
    double[][] aggregation = new double[cellData.length][cellData[0].length];
    // 'aggregation'. First, iterate over dishes.
    for (int d = 0; d < getNumDishes(); d++) {
        dishModel.setDishNumber(d);
        // Reset the aggregation.
        for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
            Arrays.fill(aggregation[sIndex], 0);
        }
        // Next, iterate over the cells in a dish.
        for (int c = 0; c < getNumCellsPerDish(); c++) {
            // 12/01/01
            if ((c + 1) % 50 == 0) {
                this.dishNumber = d;
                this.cellNumber = c;
            // System.out.println("Dish # " + (d + 1) + ", Cell # " +
            // (c + 1));
            }
            // Reset the cell data.
            for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
                Arrays.fill(cellData[sIndex], 0);
            }
            // Initialize the history array.
            history.initialize();
            // Generate data for one cell, storing only those time
            // steps that are indicated in the timeSteps
            // array. Note that the timeSteps array is 1-indexed,
            // while the 's' variable is 0-indexed. (Might want to
            // fix this in a future version.)
            int s = -1;
            int sIndex = 0;
            // for the steps in timeSteps[].
            while (++s < getStepsGenerated()) {
                // being synchronized.
                if (s > 0) {
                    history.update();
                }
                if (s == timeSteps[sIndex] - 1) {
                    double[][] historyArray = history.getHistoryArray();
                    // the factors (genes).
                    for (int f = 0; f < numFactors; f++) {
                        // Copy data from the getModel time step in
                        // the history to the cellData[][] array.
                        cellData[sIndex][f] = historyArray[0][f];
                        // Antilog it if necessary.
                        if (isAntilogCalculated()) {
                            cellData[sIndex][f] = Math.exp(cellData[sIndex][f]);
                        }
                        // be null.)
                        if (isRawDataSaved()) {
                            int row = d * getNumCellsPerDish() + c;
                            /*this.rawData[f][sIndex][row] =
                                historyArray[0][f];*/
                            this.rawData[f][sIndex][row] = cellData[sIndex][f];
                        }
                    }
                    if (++sIndex >= timeSteps.length) {
                        break;
                    }
                }
            // END if (s == timeSteps[sIndex])
            }
            // Aggregate data.
            for (int i = 0; i < cellData.length; i++) {
                for (int j = 0; j < cellData[0].length; j++) {
                    aggregation[i][j] += cellData[i][j];
                }
            }
        }
        if (isMeasuredDataSaved()) {
            // generated.)
            for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
                for (int f = 0; f < numFactors; f++) {
                    for (int ch = 0; ch < numChips; ch++) {
                        double average = aggregation[sIndex][f] / getNumCellsPerDish();
                        double pixelError = pixelErrorDist.nextRandom();
                        double measurement = average + sampleErrors[d][ch] + chipErrors[d][ch] + pixelError;
                        this.measuredData[f][sIndex][d * numChips + ch] = measurement;
                    }
                // END for (int ch = 0; ch < numChips; ch++)
                }
            // END for (int f = 0; f < history.getNumFacto...
            }
        // END for (int sIndex = 0; sIndex < timeSteps...
        }
    // END if (measuredDataSaved)
    }
    return;
}
Also used : DishModel(edu.cmu.tetrad.gene.tetrad.gene.history.DishModel) UpdateFunction(edu.cmu.tetrad.gene.tetrad.gene.history.UpdateFunction) Distribution(edu.cmu.tetrad.util.dist.Distribution) Normal(edu.cmu.tetrad.util.dist.Normal)

Example 2 with Distribution

use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.

the class TestLingamPattern method test1.

@Test
public void test1() {
    RandomUtil.getInstance().setSeed(4938492L);
    int sampleSize = 1000;
    List<Node> nodes = new ArrayList<>();
    for (int i = 0; i < 6; i++) {
        nodes.add(new ContinuousVariable("X" + (i + 1)));
    }
    Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 6, 4, 4, 4, false));
    List<Distribution> variableDistributions = new ArrayList<>();
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Uniform(-1, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = simulateDataNonNormal(semIm, sampleSize, variableDistributions);
    Score score = new SemBicScore(new CovarianceMatrixOnTheFly(dataSet));
    Graph estPattern = new Fges(score).search();
    LingamPattern lingam = new LingamPattern(estPattern, dataSet);
    lingam.search();
    double[] pvals = lingam.getPValues();
    double[] expectedPVals = { 0.18, 0.29, 0.88, 0.00, 0.01, 0.58 };
    for (int i = 0; i < pvals.length; i++) {
        assertEquals(expectedPVals[i], pvals[i], 0.01);
    }
}
Also used : ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) Uniform(edu.cmu.tetrad.util.dist.Uniform) Normal(edu.cmu.tetrad.util.dist.Normal) ContinuousVariable(edu.cmu.tetrad.data.ContinuousVariable) Distribution(edu.cmu.tetrad.util.dist.Distribution) SemPm(edu.cmu.tetrad.sem.SemPm) CovarianceMatrixOnTheFly(edu.cmu.tetrad.data.CovarianceMatrixOnTheFly) SemIm(edu.cmu.tetrad.sem.SemIm) Test(org.junit.Test)

Example 3 with Distribution

use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.

the class Ling method makeDataSet.

private void makeDataSet(GraphWithParameters graphWP) {
    // define the "Gaussian-squared" distribution
    Distribution gp2 = new GaussianPower(2);
    // the coefficients of the error terms  (here, all 1s)
    TetradVector errorCoefficients = getErrorCoeffsIdentity(graphWP.getGraph().getNumNodes());
    // generate data from the SEM
    TetradMatrix inVectors = simulateCyclic(graphWP, errorCoefficients, numSamples, gp2);
    // reformat it
    dataSet = ColtDataSet.makeContinuousData(graphWP.getGraph().getNodes(), new TetradMatrix(inVectors.transpose().toArray()));
}
Also used : TetradVector(edu.cmu.tetrad.util.TetradVector) Distribution(edu.cmu.tetrad.util.dist.Distribution) GaussianPower(edu.cmu.tetrad.util.dist.GaussianPower) TetradMatrix(edu.cmu.tetrad.util.TetradMatrix)

Example 4 with Distribution

use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.

the class LargeScaleSimulation method setupModel.

private void setupModel(int size) {
    if (alreadySetUp) {
        return;
    }
    Map<Node, Integer> nodesHash = new HashedMap<>();
    for (int i = 0; i < variableNodes.size(); i++) {
        nodesHash.put(variableNodes.get(i), i);
    }
    this.parents = new int[size][];
    this.coefs = new double[size][];
    this.errorVars = new double[size];
    this.means = new double[size];
    for (int i = 0; i < size; i++) {
        this.parents[i] = new int[0];
        this.coefs[i] = new double[0];
    }
    Distribution edgeCoefDist = new Split(coefLow, coefHigh);
    Distribution errorCovarDist = new Uniform(varLow, varHigh);
    Distribution meanDist = new Uniform(meanLow, meanHigh);
    for (Edge edge : graph.getEdges()) {
        Node tail = Edges.getDirectedEdgeTail(edge);
        Node head = Edges.getDirectedEdgeHead(edge);
        int _tail = nodesHash.get(tail);
        int _head = nodesHash.get(head);
        int[] parents = this.parents[_head];
        int[] newParents = new int[parents.length + 1];
        System.arraycopy(parents, 0, newParents, 0, parents.length);
        newParents[newParents.length - 1] = _tail;
        double[] coefs = this.coefs[_head];
        double[] newCoefs = new double[coefs.length + 1];
        System.arraycopy(coefs, 0, newCoefs, 0, coefs.length);
        double coef = edgeCoefDist.nextRandom();
        if (includePositiveCoefs && !includeNegativeCoefs) {
            coef = Math.abs(coef);
        } else if (!includePositiveCoefs && includeNegativeCoefs) {
            coef = -Math.abs(coef);
        } else if (!includePositiveCoefs && !includeNegativeCoefs) {
            coef = 0;
        }
        newCoefs[newCoefs.length - 1] = coef;
        this.parents[_head] = newParents;
        this.coefs[_head] = newCoefs;
    }
    if (graph instanceof TimeLagGraph) {
        TimeLagGraph lagGraph = (TimeLagGraph) graph;
        // TimeSeriesUtils.getKnowledge(lagGraph);
        IKnowledge knowledge = getKnowledge(lagGraph);
        List<Node> lag0 = lagGraph.getLag0Nodes();
        for (Node y : lag0) {
            List<Node> _parents = lagGraph.getParents(y);
            for (Node x : _parents) {
                List<List<Node>> similar = returnSimilarPairs(x, y, knowledge);
                int _x = variableNodes.indexOf(x);
                int _y = variableNodes.indexOf(y);
                double first = Double.NaN;
                for (int i = 0; i < parents[_y].length; i++) {
                    if (_x == parents[_y][i]) {
                        first = coefs[_y][i];
                    }
                }
                for (int j = 0; j < similar.get(0).size(); j++) {
                    int _xx = variableNodes.indexOf(similar.get(0).get(j));
                    int _yy = variableNodes.indexOf(similar.get(1).get(j));
                    for (int i = 0; i < parents[_yy].length; i++) {
                        if (_xx == parents[_yy][i]) {
                            coefs[_yy][i] = first;
                        }
                    }
                }
            }
        }
    }
    for (int i = 0; i < size; i++) {
        this.errorVars[i] = errorCovarDist.nextRandom();
        this.means[i] = meanDist.nextRandom();
    }
    alreadySetUp = true;
}
Also used : Uniform(edu.cmu.tetrad.util.dist.Uniform) Distribution(edu.cmu.tetrad.util.dist.Distribution) HashedMap(org.apache.commons.collections4.map.HashedMap) Split(edu.cmu.tetrad.util.dist.Split)

Example 5 with Distribution

use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.

the class TestLingamPattern method simulateDataNonNormal.

/**
 * This simulates data by picking random values for the exogenous terms and percolating this information down
 * through the SEM, assuming it is acyclic. Fast for large simulations but hangs for cyclic models.
 *
 * @param sampleSize    > 0.
 * @return the simulated data set.
 */
private DataSet simulateDataNonNormal(SemIm semIm, int sampleSize, List<Distribution> distributions) {
    List<Node> variables = new LinkedList<>();
    List<Node> variableNodes = semIm.getSemPm().getVariableNodes();
    for (Node node : variableNodes) {
        ContinuousVariable var = new ContinuousVariable(node.getName());
        variables.add(var);
    }
    DataSet dataSet = new ColtDataSet(sampleSize, variables);
    // Create some index arrays to hopefully speed up the simulation.
    SemGraph graph = semIm.getSemPm().getGraph();
    List<Node> tierOrdering = graph.getCausalOrdering();
    int[] tierIndices = new int[variableNodes.size()];
    for (int i = 0; i < tierIndices.length; i++) {
        tierIndices[i] = variableNodes.indexOf(tierOrdering.get(i));
    }
    int[][] _parents = new int[variables.size()][];
    for (int i = 0; i < variableNodes.size(); i++) {
        Node node = variableNodes.get(i);
        List<Node> parents = graph.getParents(node);
        for (Iterator<Node> j = parents.iterator(); j.hasNext(); ) {
            Node _node = j.next();
            if (_node.getNodeType() == NodeType.ERROR) {
                j.remove();
            }
        }
        _parents[i] = new int[parents.size()];
        for (int j = 0; j < parents.size(); j++) {
            Node _parent = parents.get(j);
            _parents[i][j] = variableNodes.indexOf(_parent);
        }
    }
    // Do the simulation.
    for (int row = 0; row < sampleSize; row++) {
        for (int i = 0; i < tierOrdering.size(); i++) {
            int col = tierIndices[i];
            Distribution distribution = distributions.get(col);
            // System.out.println(distribution);
            double value = distribution.nextRandom();
            for (int j = 0; j < _parents[col].length; j++) {
                int parent = _parents[col][j];
                value += dataSet.getDouble(row, parent) * semIm.getEdgeCoef().get(parent, col);
            }
            value += semIm.getMeans()[col];
            dataSet.setDouble(row, col, value);
        }
    }
    return dataSet;
}
Also used : ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) ContinuousVariable(edu.cmu.tetrad.data.ContinuousVariable) ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) Distribution(edu.cmu.tetrad.util.dist.Distribution)

Aggregations

Distribution (edu.cmu.tetrad.util.dist.Distribution)5 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)2 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)2 DataSet (edu.cmu.tetrad.data.DataSet)2 Normal (edu.cmu.tetrad.util.dist.Normal)2 Uniform (edu.cmu.tetrad.util.dist.Uniform)2 CovarianceMatrixOnTheFly (edu.cmu.tetrad.data.CovarianceMatrixOnTheFly)1 DishModel (edu.cmu.tetrad.gene.tetrad.gene.history.DishModel)1 UpdateFunction (edu.cmu.tetrad.gene.tetrad.gene.history.UpdateFunction)1 SemIm (edu.cmu.tetrad.sem.SemIm)1 SemPm (edu.cmu.tetrad.sem.SemPm)1 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)1 TetradVector (edu.cmu.tetrad.util.TetradVector)1 GaussianPower (edu.cmu.tetrad.util.dist.GaussianPower)1 Split (edu.cmu.tetrad.util.dist.Split)1 HashedMap (org.apache.commons.collections4.map.HashedMap)1 Test (org.junit.Test)1