Search in sources :

Example 1 with Normal

use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.

the class MeasurementSimulator method simulate.

/**
 * Simulates (optionally) neither, either, or both of two three-dimensionaly
 * data sets, rawData and measuredData. For the form of the first set, see
 * <code>getRawData</code>; for the form of the second set, see
 * <code>getMeasuredData</code>.  The idea of the simulation is that cells
 * are grown in different dishes and periodically sampled. When they are
 * sampled, their DNA is ground up and then pipetted in aggregate onto a
 * number of microarray chips. Each indivual cell is simulated separately;
 * the simulated cells are grouped into dishes. Expression levels of cells
 * in each dish are aggregated and then random noise is added to this
 * aggregate expression level representing various sources of noise: sample
 * to sample variation, chip to chip variation, and pixellation measurement
 * variation. For more information, see the Genetic Simulator spec, which
 * this method implements.
 *
 * @see #getRawData
 * @see #getMeasuredData
 */
public void simulate(GeneHistory history) {
    setHistory(history);
    history.reset();
    // Get the update function.
    UpdateFunction updateFunction = history.getUpdateFunction();
    int numFactors = updateFunction.getNumFactors();
    // Set up the dish model.
    DishModel dishModel = new DishModel(getNumDishes(), getDishDishVariability());
    // Set up the history.
    history.setInitSync(isInitSync());
    history.setDishModel(dishModel);
    // We will use two data cubes to store data for display
    // purposes: a measurement cube and a raw data cube. We first
    // construct the measurement cube, with two extra columns for
    // (a) dish number and (b) chip number.
    int numChips = getNumSamplesPerDish();
    if (isMeasuredDataSaved()) {
        int numSteps = timeSteps.length;
        int numSamples = getNumDishes() * numChips;
        this.measuredData = new double[numFactors][numSteps][numSamples];
    }
    // We next construct the raw data cube, with two extra columns
    // for (a) individual number and (b) dish number. Raw data is
    // saved out only if the 'rawDataSave' parameter is true. If
    // it's not true, these objects are not constructed. (They can
    // be extremely large.) References to the cube and the initial
    // columns need to be saved in this scope for future use (but
    // may be null).
    this.rawData = null;
    if (isRawDataSaved()) {
        int numSteps = timeSteps.length;
        int numCells = getNumDishes() * getNumCellsPerDish();
        this.rawData = new double[numFactors][numSteps][numCells];
    }
    // There are three sources of error--sample to sample, chip to
    // chip, and pixel digitalization.  A certain number of
    // samples are drawn from each dish, and these are deposited
    // on the same number of chips. There is an error associated
    // with each chip and with each sample. (Pixel digitalization
    // error is drawn later.)
    double sampleSd = getSampleSampleVariability();
    double chipSd = getChipChipVariability();
    double pixelSd = getPixelDigitalization();
    Distribution sampleErrorDist = new Normal(0.0, sampleSd);
    Distribution chipErrorDist = new Normal(0.0, chipSd);
    Distribution pixelErrorDist = new Normal(0.0, pixelSd);
    double[][] chipErrors = new double[getNumDishes()][numChips];
    double[][] sampleErrors = new double[getNumDishes()][numChips];
    for (int d = 0; d < getNumDishes(); d++) {
        for (int ch = 0; ch < numChips; ch++) {
            chipErrors[d][ch] = chipErrorDist.nextRandom();
            sampleErrors[d][ch] = sampleErrorDist.nextRandom();
        }
    }
    // Make two double[] arrays, one to store data generated for a
    // particular cell (in a particular dish) and one to store
    // aggregations of these data.
    double[][] cellData = new double[timeSteps.length][numFactors];
    double[][] aggregation = new double[cellData.length][cellData[0].length];
    // 'aggregation'. First, iterate over dishes.
    for (int d = 0; d < getNumDishes(); d++) {
        dishModel.setDishNumber(d);
        // Reset the aggregation.
        for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
            Arrays.fill(aggregation[sIndex], 0);
        }
        // Next, iterate over the cells in a dish.
        for (int c = 0; c < getNumCellsPerDish(); c++) {
            // 12/01/01
            if ((c + 1) % 50 == 0) {
                this.dishNumber = d;
                this.cellNumber = c;
            // System.out.println("Dish # " + (d + 1) + ", Cell # " +
            // (c + 1));
            }
            // Reset the cell data.
            for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
                Arrays.fill(cellData[sIndex], 0);
            }
            // Initialize the history array.
            history.initialize();
            // Generate data for one cell, storing only those time
            // steps that are indicated in the timeSteps
            // array. Note that the timeSteps array is 1-indexed,
            // while the 's' variable is 0-indexed. (Might want to
            // fix this in a future version.)
            int s = -1;
            int sIndex = 0;
            // for the steps in timeSteps[].
            while (++s < getStepsGenerated()) {
                // being synchronized.
                if (s > 0) {
                    history.update();
                }
                if (s == timeSteps[sIndex] - 1) {
                    double[][] historyArray = history.getHistoryArray();
                    // the factors (genes).
                    for (int f = 0; f < numFactors; f++) {
                        // Copy data from the getModel time step in
                        // the history to the cellData[][] array.
                        cellData[sIndex][f] = historyArray[0][f];
                        // Antilog it if necessary.
                        if (isAntilogCalculated()) {
                            cellData[sIndex][f] = Math.exp(cellData[sIndex][f]);
                        }
                        // be null.)
                        if (isRawDataSaved()) {
                            int row = d * getNumCellsPerDish() + c;
                            /*this.rawData[f][sIndex][row] =
                                historyArray[0][f];*/
                            this.rawData[f][sIndex][row] = cellData[sIndex][f];
                        }
                    }
                    if (++sIndex >= timeSteps.length) {
                        break;
                    }
                }
            // END if (s == timeSteps[sIndex])
            }
            // Aggregate data.
            for (int i = 0; i < cellData.length; i++) {
                for (int j = 0; j < cellData[0].length; j++) {
                    aggregation[i][j] += cellData[i][j];
                }
            }
        }
        if (isMeasuredDataSaved()) {
            // generated.)
            for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
                for (int f = 0; f < numFactors; f++) {
                    for (int ch = 0; ch < numChips; ch++) {
                        double average = aggregation[sIndex][f] / getNumCellsPerDish();
                        double pixelError = pixelErrorDist.nextRandom();
                        double measurement = average + sampleErrors[d][ch] + chipErrors[d][ch] + pixelError;
                        this.measuredData[f][sIndex][d * numChips + ch] = measurement;
                    }
                // END for (int ch = 0; ch < numChips; ch++)
                }
            // END for (int f = 0; f < history.getNumFacto...
            }
        // END for (int sIndex = 0; sIndex < timeSteps...
        }
    // END if (measuredDataSaved)
    }
    return;
}
Also used : DishModel(edu.cmu.tetrad.gene.tetrad.gene.history.DishModel) UpdateFunction(edu.cmu.tetrad.gene.tetrad.gene.history.UpdateFunction) Distribution(edu.cmu.tetrad.util.dist.Distribution) Normal(edu.cmu.tetrad.util.dist.Normal)

Example 2 with Normal

use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.

the class TestLingamPattern method test1.

@Test
public void test1() {
    RandomUtil.getInstance().setSeed(4938492L);
    int sampleSize = 1000;
    List<Node> nodes = new ArrayList<>();
    for (int i = 0; i < 6; i++) {
        nodes.add(new ContinuousVariable("X" + (i + 1)));
    }
    Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 6, 4, 4, 4, false));
    List<Distribution> variableDistributions = new ArrayList<>();
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Uniform(-1, 1));
    variableDistributions.add(new Normal(0, 1));
    variableDistributions.add(new Normal(0, 1));
    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = simulateDataNonNormal(semIm, sampleSize, variableDistributions);
    Score score = new SemBicScore(new CovarianceMatrixOnTheFly(dataSet));
    Graph estPattern = new Fges(score).search();
    LingamPattern lingam = new LingamPattern(estPattern, dataSet);
    lingam.search();
    double[] pvals = lingam.getPValues();
    double[] expectedPVals = { 0.18, 0.29, 0.88, 0.00, 0.01, 0.58 };
    for (int i = 0; i < pvals.length; i++) {
        assertEquals(expectedPVals[i], pvals[i], 0.01);
    }
}
Also used : ColtDataSet(edu.cmu.tetrad.data.ColtDataSet) DataSet(edu.cmu.tetrad.data.DataSet) Uniform(edu.cmu.tetrad.util.dist.Uniform) Normal(edu.cmu.tetrad.util.dist.Normal) ContinuousVariable(edu.cmu.tetrad.data.ContinuousVariable) Distribution(edu.cmu.tetrad.util.dist.Distribution) SemPm(edu.cmu.tetrad.sem.SemPm) CovarianceMatrixOnTheFly(edu.cmu.tetrad.data.CovarianceMatrixOnTheFly) SemIm(edu.cmu.tetrad.sem.SemIm) Test(org.junit.Test)

Example 3 with Normal

use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.

the class QQPlot method testPlot.

// ============================ Private Methods =======================//
/**
 * Used to test this class.
 *
 * Generates a continuous test variable and q-q plots it.
 */
private void testPlot() {
    ContinuousVariable c = new ContinuousVariable("test");
    if (dataSet.getVariable("test") == null)
        dataSet.addVariable(c);
    ContinuousVariable c2 = new ContinuousVariable("test2");
    if (dataSet.getVariable("test2") == null)
        dataSet.addVariable(c2);
    this.selectedVariable = c;
    int columnIndex = dataSet.getColumn(c);
    Normal g = new Normal(1, 1);
    Exponential e = new Exponential(1);
    double mean = 0.0;
    double sd = 0.0;
    this.minData = 10000000000000.0;
    this.maxData = 0.0;
    this.minComparison = 1000000000000.0;
    this.maxComparison = 0.0;
    for (int i = 0; i < dataSet.getNumRows(); i++) {
        double value = g.nextRandom();
        double value2 = e.nextRandom();
        dataSet.setDouble(i, columnIndex, value);
        dataSet.setDouble(i, columnIndex + 1, value2);
        mean += value;
        if (value < this.minData)
            this.minData = value;
        if (value > this.maxData)
            this.maxData = value;
    // System.out.println(value);
    // System.out.println(mean);
    }
    // System.out.println(this.dataSet.getNumRows());
    NormalityTests.kolmogorovSmirnov(dataSet, c2);
    // sort the dataset
    for (int i = 0; i < dataSet.getNumRows(); i++) {
        for (int k = i; k < dataSet.getNumRows(); k++) {
            if (dataSet.getDouble(i, columnIndex) > dataSet.getDouble(k, columnIndex)) {
                double temp = dataSet.getDouble(i, columnIndex);
                dataSet.setDouble(i, columnIndex, dataSet.getDouble(k, columnIndex));
                dataSet.setDouble(k, columnIndex, temp);
            }
        }
    }
    if (mean == 0.0)
        mean = 1.0;
    else
        mean /= dataSet.getNumRows();
    for (int i = 0; i < dataSet.getNumRows(); i++) {
        sd += (dataSet.getDouble(i, columnIndex) - mean) * (dataSet.getDouble(i, columnIndex) - mean);
    // System.out.println(dataSet.getDouble(i, columnIndex));
    // System.out.println(sd);
    }
    if (sd == 0.0) {
        sd = 1.0;
    } else {
        sd /= dataSet.getNumRows() - 1.0;
        sd = Math.sqrt(sd);
    }
    // System.out.println("Mean: " + mean + " SD: " + sd + " Min: " + this.minData + " Max: " + this.maxData);
    this.comparison = new cern.jet.random.Normal(mean, sd, new MersenneTwister());
    calculateComparisonSet(this.comparison, this.dataSet);
    if (this.minData < this.minComparison)
        this.min = this.minData;
    else
        this.min = this.minComparison;
    if (this.maxData > this.maxComparison)
        this.max = this.maxData;
    else
        this.max = this.maxComparison;
// end test code
}
Also used : ContinuousVariable(edu.cmu.tetrad.data.ContinuousVariable) Exponential(edu.cmu.tetrad.util.dist.Exponential) Normal(edu.cmu.tetrad.util.dist.Normal) MersenneTwister(cern.jet.random.engine.MersenneTwister)

Example 4 with Normal

use of edu.cmu.tetrad.util.dist.Normal in project tetrad by cmu-phil.

the class SemPm method initializeParams.

private void initializeParams() {
    // Note that a distinction between parameterizable and non-parameterizable nodes is being added
    // to accomodate time lag graphs. jdramsey 4/14/10.
    List<Parameter> parameters = new ArrayList<>();
    Set<Edge> edges = graph.getEdges();
    Collections.sort(new ArrayList<>(edges), new Comparator<Edge>() {

        public int compare(Edge o1, Edge o2) {
            int compareFirst = o1.getNode1().getName().compareTo(o2.getNode1().toString());
            int compareSecond = o1.getNode2().getName().compareTo(o2.getNode2().toString());
            if (compareFirst != 0) {
                return compareFirst;
            }
            if (compareSecond != 0) {
                return compareSecond;
            }
            return 0;
        }
    });
    // aren't error edges *and are into parameterizable node* (the last bit jdramsey 4/14/10).
    for (Edge edge : edges) {
        if (edge.getNode1() == edge.getNode2()) {
            continue;
        // throw new IllegalStateException("There should not be any" +
        // "edges from a node to itself in a SemGraph: " + edge);
        }
        if (!SemGraph.isErrorEdge(edge) && edge.getEndpoint1() == Endpoint.TAIL && edge.getEndpoint2() == Endpoint.ARROW) {
            if (!graph.isParameterizable(edge.getNode2())) {
                continue;
            }
            Parameter param = new Parameter(newBName(), ParamType.COEF, edge.getNode1(), edge.getNode2());
            param.setDistribution(new Split(0.5, 1.5));
            // param.setDistribution(new SplitDistributionSpecial(0.5, 1.5));
            // param.setDistribution(new UniformDistribution(-0.2, 0.2));
            // param.setDistribution(coefDistribution);
            parameters.add(param);
        }
    }
    // Add error variance freeParameters for exogenous nodes of all *parameterizable* nodes.
    for (Node node : getVariableNodes()) {
        if (!graph.isParameterizable(node)) {
            continue;
        }
        Parameter param = new Parameter(newTName(), ParamType.VAR, node, node);
        param.setDistribution(new Uniform(1.0, 3.0));
        parameters.add(param);
    }
    // connect exogenous nodes.)
    for (Edge edge : edges) {
        if (Edges.isBidirectedEdge(edge)) {
            Node node1 = edge.getNode1();
            Node node2 = edge.getNode2();
            // no...
            if (!graph.isParameterizable(node1) || !graph.isParameterizable(node2)) {
                continue;
            }
            node1 = getGraph().getVarNode(node1);
            node2 = getGraph().getVarNode(node2);
            Parameter param = new Parameter(newTName(), ParamType.COVAR, node1, node2);
            param.setDistribution(new SingleValue(0.2));
            parameters.add(param);
        }
    }
    // Add mean freeParameters for all parameterizable nodes.
    for (Node node : getVariableNodes()) {
        if (!graph.isParameterizable(node)) {
            continue;
        }
        Parameter mean = new Parameter(newMName(), ParamType.MEAN, node, node);
        mean.setDistribution(new Normal(0.0, 1.0));
        parameters.add(mean);
    }
    this.parameters = Collections.unmodifiableList(parameters);
}
Also used : SingleValue(edu.cmu.tetrad.util.dist.SingleValue) Uniform(edu.cmu.tetrad.util.dist.Uniform) Split(edu.cmu.tetrad.util.dist.Split) Normal(edu.cmu.tetrad.util.dist.Normal)

Aggregations

Normal (edu.cmu.tetrad.util.dist.Normal)4 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)2 Distribution (edu.cmu.tetrad.util.dist.Distribution)2 Uniform (edu.cmu.tetrad.util.dist.Uniform)2 MersenneTwister (cern.jet.random.engine.MersenneTwister)1 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)1 CovarianceMatrixOnTheFly (edu.cmu.tetrad.data.CovarianceMatrixOnTheFly)1 DataSet (edu.cmu.tetrad.data.DataSet)1 DishModel (edu.cmu.tetrad.gene.tetrad.gene.history.DishModel)1 UpdateFunction (edu.cmu.tetrad.gene.tetrad.gene.history.UpdateFunction)1 SemIm (edu.cmu.tetrad.sem.SemIm)1 SemPm (edu.cmu.tetrad.sem.SemPm)1 Exponential (edu.cmu.tetrad.util.dist.Exponential)1 SingleValue (edu.cmu.tetrad.util.dist.SingleValue)1 Split (edu.cmu.tetrad.util.dist.Split)1 Test (org.junit.Test)1