use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.
the class MeasurementSimulator method simulate.
/**
* Simulates (optionally) neither, either, or both of two three-dimensionaly
* data sets, rawData and measuredData. For the form of the first set, see
* <code>getRawData</code>; for the form of the second set, see
* <code>getMeasuredData</code>. The idea of the simulation is that cells
* are grown in different dishes and periodically sampled. When they are
* sampled, their DNA is ground up and then pipetted in aggregate onto a
* number of microarray chips. Each indivual cell is simulated separately;
* the simulated cells are grouped into dishes. Expression levels of cells
* in each dish are aggregated and then random noise is added to this
* aggregate expression level representing various sources of noise: sample
* to sample variation, chip to chip variation, and pixellation measurement
* variation. For more information, see the Genetic Simulator spec, which
* this method implements.
*
* @see #getRawData
* @see #getMeasuredData
*/
public void simulate(GeneHistory history) {
setHistory(history);
history.reset();
// Get the update function.
UpdateFunction updateFunction = history.getUpdateFunction();
int numFactors = updateFunction.getNumFactors();
// Set up the dish model.
DishModel dishModel = new DishModel(getNumDishes(), getDishDishVariability());
// Set up the history.
history.setInitSync(isInitSync());
history.setDishModel(dishModel);
// We will use two data cubes to store data for display
// purposes: a measurement cube and a raw data cube. We first
// construct the measurement cube, with two extra columns for
// (a) dish number and (b) chip number.
int numChips = getNumSamplesPerDish();
if (isMeasuredDataSaved()) {
int numSteps = timeSteps.length;
int numSamples = getNumDishes() * numChips;
this.measuredData = new double[numFactors][numSteps][numSamples];
}
// We next construct the raw data cube, with two extra columns
// for (a) individual number and (b) dish number. Raw data is
// saved out only if the 'rawDataSave' parameter is true. If
// it's not true, these objects are not constructed. (They can
// be extremely large.) References to the cube and the initial
// columns need to be saved in this scope for future use (but
// may be null).
this.rawData = null;
if (isRawDataSaved()) {
int numSteps = timeSteps.length;
int numCells = getNumDishes() * getNumCellsPerDish();
this.rawData = new double[numFactors][numSteps][numCells];
}
// There are three sources of error--sample to sample, chip to
// chip, and pixel digitalization. A certain number of
// samples are drawn from each dish, and these are deposited
// on the same number of chips. There is an error associated
// with each chip and with each sample. (Pixel digitalization
// error is drawn later.)
double sampleSd = getSampleSampleVariability();
double chipSd = getChipChipVariability();
double pixelSd = getPixelDigitalization();
Distribution sampleErrorDist = new Normal(0.0, sampleSd);
Distribution chipErrorDist = new Normal(0.0, chipSd);
Distribution pixelErrorDist = new Normal(0.0, pixelSd);
double[][] chipErrors = new double[getNumDishes()][numChips];
double[][] sampleErrors = new double[getNumDishes()][numChips];
for (int d = 0; d < getNumDishes(); d++) {
for (int ch = 0; ch < numChips; ch++) {
chipErrors[d][ch] = chipErrorDist.nextRandom();
sampleErrors[d][ch] = sampleErrorDist.nextRandom();
}
}
// Make two double[] arrays, one to store data generated for a
// particular cell (in a particular dish) and one to store
// aggregations of these data.
double[][] cellData = new double[timeSteps.length][numFactors];
double[][] aggregation = new double[cellData.length][cellData[0].length];
// 'aggregation'. First, iterate over dishes.
for (int d = 0; d < getNumDishes(); d++) {
dishModel.setDishNumber(d);
// Reset the aggregation.
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
Arrays.fill(aggregation[sIndex], 0);
}
// Next, iterate over the cells in a dish.
for (int c = 0; c < getNumCellsPerDish(); c++) {
// 12/01/01
if ((c + 1) % 50 == 0) {
this.dishNumber = d;
this.cellNumber = c;
// System.out.println("Dish # " + (d + 1) + ", Cell # " +
// (c + 1));
}
// Reset the cell data.
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
Arrays.fill(cellData[sIndex], 0);
}
// Initialize the history array.
history.initialize();
// Generate data for one cell, storing only those time
// steps that are indicated in the timeSteps
// array. Note that the timeSteps array is 1-indexed,
// while the 's' variable is 0-indexed. (Might want to
// fix this in a future version.)
int s = -1;
int sIndex = 0;
// for the steps in timeSteps[].
while (++s < getStepsGenerated()) {
// being synchronized.
if (s > 0) {
history.update();
}
if (s == timeSteps[sIndex] - 1) {
double[][] historyArray = history.getHistoryArray();
// the factors (genes).
for (int f = 0; f < numFactors; f++) {
// Copy data from the getModel time step in
// the history to the cellData[][] array.
cellData[sIndex][f] = historyArray[0][f];
// Antilog it if necessary.
if (isAntilogCalculated()) {
cellData[sIndex][f] = Math.exp(cellData[sIndex][f]);
}
// be null.)
if (isRawDataSaved()) {
int row = d * getNumCellsPerDish() + c;
/*this.rawData[f][sIndex][row] =
historyArray[0][f];*/
this.rawData[f][sIndex][row] = cellData[sIndex][f];
}
}
if (++sIndex >= timeSteps.length) {
break;
}
}
// END if (s == timeSteps[sIndex])
}
// Aggregate data.
for (int i = 0; i < cellData.length; i++) {
for (int j = 0; j < cellData[0].length; j++) {
aggregation[i][j] += cellData[i][j];
}
}
}
if (isMeasuredDataSaved()) {
// generated.)
for (int sIndex = 0; sIndex < timeSteps.length; sIndex++) {
for (int f = 0; f < numFactors; f++) {
for (int ch = 0; ch < numChips; ch++) {
double average = aggregation[sIndex][f] / getNumCellsPerDish();
double pixelError = pixelErrorDist.nextRandom();
double measurement = average + sampleErrors[d][ch] + chipErrors[d][ch] + pixelError;
this.measuredData[f][sIndex][d * numChips + ch] = measurement;
}
// END for (int ch = 0; ch < numChips; ch++)
}
// END for (int f = 0; f < history.getNumFacto...
}
// END for (int sIndex = 0; sIndex < timeSteps...
}
// END if (measuredDataSaved)
}
return;
}
use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.
the class TestLingamPattern method test1.
@Test
public void test1() {
RandomUtil.getInstance().setSeed(4938492L);
int sampleSize = 1000;
List<Node> nodes = new ArrayList<>();
for (int i = 0; i < 6; i++) {
nodes.add(new ContinuousVariable("X" + (i + 1)));
}
Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 6, 4, 4, 4, false));
List<Distribution> variableDistributions = new ArrayList<>();
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Uniform(-1, 1));
variableDistributions.add(new Normal(0, 1));
variableDistributions.add(new Normal(0, 1));
SemPm semPm = new SemPm(graph);
SemIm semIm = new SemIm(semPm);
DataSet dataSet = simulateDataNonNormal(semIm, sampleSize, variableDistributions);
Score score = new SemBicScore(new CovarianceMatrixOnTheFly(dataSet));
Graph estPattern = new Fges(score).search();
LingamPattern lingam = new LingamPattern(estPattern, dataSet);
lingam.search();
double[] pvals = lingam.getPValues();
double[] expectedPVals = { 0.18, 0.29, 0.88, 0.00, 0.01, 0.58 };
for (int i = 0; i < pvals.length; i++) {
assertEquals(expectedPVals[i], pvals[i], 0.01);
}
}
use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.
the class Ling method makeDataSet.
private void makeDataSet(GraphWithParameters graphWP) {
// define the "Gaussian-squared" distribution
Distribution gp2 = new GaussianPower(2);
// the coefficients of the error terms (here, all 1s)
TetradVector errorCoefficients = getErrorCoeffsIdentity(graphWP.getGraph().getNumNodes());
// generate data from the SEM
TetradMatrix inVectors = simulateCyclic(graphWP, errorCoefficients, numSamples, gp2);
// reformat it
dataSet = ColtDataSet.makeContinuousData(graphWP.getGraph().getNodes(), new TetradMatrix(inVectors.transpose().toArray()));
}
use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.
the class LargeScaleSimulation method setupModel.
private void setupModel(int size) {
if (alreadySetUp) {
return;
}
Map<Node, Integer> nodesHash = new HashedMap<>();
for (int i = 0; i < variableNodes.size(); i++) {
nodesHash.put(variableNodes.get(i), i);
}
this.parents = new int[size][];
this.coefs = new double[size][];
this.errorVars = new double[size];
this.means = new double[size];
for (int i = 0; i < size; i++) {
this.parents[i] = new int[0];
this.coefs[i] = new double[0];
}
Distribution edgeCoefDist = new Split(coefLow, coefHigh);
Distribution errorCovarDist = new Uniform(varLow, varHigh);
Distribution meanDist = new Uniform(meanLow, meanHigh);
for (Edge edge : graph.getEdges()) {
Node tail = Edges.getDirectedEdgeTail(edge);
Node head = Edges.getDirectedEdgeHead(edge);
int _tail = nodesHash.get(tail);
int _head = nodesHash.get(head);
int[] parents = this.parents[_head];
int[] newParents = new int[parents.length + 1];
System.arraycopy(parents, 0, newParents, 0, parents.length);
newParents[newParents.length - 1] = _tail;
double[] coefs = this.coefs[_head];
double[] newCoefs = new double[coefs.length + 1];
System.arraycopy(coefs, 0, newCoefs, 0, coefs.length);
double coef = edgeCoefDist.nextRandom();
if (includePositiveCoefs && !includeNegativeCoefs) {
coef = Math.abs(coef);
} else if (!includePositiveCoefs && includeNegativeCoefs) {
coef = -Math.abs(coef);
} else if (!includePositiveCoefs && !includeNegativeCoefs) {
coef = 0;
}
newCoefs[newCoefs.length - 1] = coef;
this.parents[_head] = newParents;
this.coefs[_head] = newCoefs;
}
if (graph instanceof TimeLagGraph) {
TimeLagGraph lagGraph = (TimeLagGraph) graph;
// TimeSeriesUtils.getKnowledge(lagGraph);
IKnowledge knowledge = getKnowledge(lagGraph);
List<Node> lag0 = lagGraph.getLag0Nodes();
for (Node y : lag0) {
List<Node> _parents = lagGraph.getParents(y);
for (Node x : _parents) {
List<List<Node>> similar = returnSimilarPairs(x, y, knowledge);
int _x = variableNodes.indexOf(x);
int _y = variableNodes.indexOf(y);
double first = Double.NaN;
for (int i = 0; i < parents[_y].length; i++) {
if (_x == parents[_y][i]) {
first = coefs[_y][i];
}
}
for (int j = 0; j < similar.get(0).size(); j++) {
int _xx = variableNodes.indexOf(similar.get(0).get(j));
int _yy = variableNodes.indexOf(similar.get(1).get(j));
for (int i = 0; i < parents[_yy].length; i++) {
if (_xx == parents[_yy][i]) {
coefs[_yy][i] = first;
}
}
}
}
}
}
for (int i = 0; i < size; i++) {
this.errorVars[i] = errorCovarDist.nextRandom();
this.means[i] = meanDist.nextRandom();
}
alreadySetUp = true;
}
use of edu.cmu.tetrad.util.dist.Distribution in project tetrad by cmu-phil.
the class TestLingamPattern method simulateDataNonNormal.
/**
* This simulates data by picking random values for the exogenous terms and percolating this information down
* through the SEM, assuming it is acyclic. Fast for large simulations but hangs for cyclic models.
*
* @param sampleSize > 0.
* @return the simulated data set.
*/
private DataSet simulateDataNonNormal(SemIm semIm, int sampleSize, List<Distribution> distributions) {
List<Node> variables = new LinkedList<>();
List<Node> variableNodes = semIm.getSemPm().getVariableNodes();
for (Node node : variableNodes) {
ContinuousVariable var = new ContinuousVariable(node.getName());
variables.add(var);
}
DataSet dataSet = new ColtDataSet(sampleSize, variables);
// Create some index arrays to hopefully speed up the simulation.
SemGraph graph = semIm.getSemPm().getGraph();
List<Node> tierOrdering = graph.getCausalOrdering();
int[] tierIndices = new int[variableNodes.size()];
for (int i = 0; i < tierIndices.length; i++) {
tierIndices[i] = variableNodes.indexOf(tierOrdering.get(i));
}
int[][] _parents = new int[variables.size()][];
for (int i = 0; i < variableNodes.size(); i++) {
Node node = variableNodes.get(i);
List<Node> parents = graph.getParents(node);
for (Iterator<Node> j = parents.iterator(); j.hasNext(); ) {
Node _node = j.next();
if (_node.getNodeType() == NodeType.ERROR) {
j.remove();
}
}
_parents[i] = new int[parents.size()];
for (int j = 0; j < parents.size(); j++) {
Node _parent = parents.get(j);
_parents[i][j] = variableNodes.indexOf(_parent);
}
}
// Do the simulation.
for (int row = 0; row < sampleSize; row++) {
for (int i = 0; i < tierOrdering.size(); i++) {
int col = tierIndices[i];
Distribution distribution = distributions.get(col);
// System.out.println(distribution);
double value = distribution.nextRandom();
for (int j = 0; j < _parents[col].length; j++) {
int parent = _parents[col][j];
value += dataSet.getDouble(row, parent) * semIm.getEdgeCoef().get(parent, col);
}
value += semIm.getMeans()[col];
dataSet.setDouble(row, col, value);
}
}
return dataSet;
}
Aggregations