use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class SemBicScoreDeterministic method getMaximalLinearlyDependentSet.
private int[] getMaximalLinearlyDependentSet(int i, int[] parents, ICovarianceMatrix cov) {
double small = getDeterminismThreshold();
List<Node> _parents = new ArrayList<>();
for (int p : parents) _parents.add(variables.get(p));
DepthChoiceGenerator gen = new DepthChoiceGenerator(_parents.size(), _parents.size());
int[] choice;
while ((choice = gen.next()) != null) {
int[] sel0 = new int[choice.length];
List<Integer> all = new ArrayList<>();
for (int w = 0; w < parents.length; w++) all.add(parents[w]);
for (int w = 0; w < sel0.length; w++) all.remove(sel0[w]);
int[] sel = new int[all.size()];
for (int w = 0; w < all.size(); w++) sel[w] = all.get(w);
List<Node> _sel = new ArrayList<>();
for (int m = 0; m < choice.length; m++) {
sel[m] = parents[m];
_sel.add(variables.get(sel[m]));
}
TetradMatrix m = cov.getSelection(sel, sel);
double s2 = getCovariances().getValue(i, i);
TetradMatrix covxx = getSelection(getCovariances(), parents, parents);
TetradVector covxy = getSelection(getCovariances(), parents, new int[] { i }).getColumn(0);
s2 -= covxx.inverse().times(covxy).dotProduct(covxy);
if (s2 <= small) {
out.println("### Linear dependence among variables: " + _sel);
out.println("### Removing " + _sel.get(0));
return sel;
}
try {
m.inverse();
} catch (Exception e2) {
// forbidden.add(sel[0]);
out.println("### Linear dependence among variables: " + _sel);
out.println("### Removing " + _sel.get(0));
return sel;
}
}
return new int[0];
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class SemBicScoreImages2 method localScore.
/**
* Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model
*/
public double localScore(int i, int... parents) {
for (int p : parents) if (forbidden.contains(p))
return Double.NaN;
double lik = 0.0;
for (int k = 0; k < covariances.size(); k++) {
double residualVariance = getCovariances(k).getValue(i, i);
TetradMatrix covxx = getSelection1(getCovariances(k), parents);
try {
TetradMatrix covxxInv = covxx.inverse();
TetradVector covxy = getSelection2(getCovariances(k), parents, i);
TetradVector b = covxxInv.times(covxy);
residualVariance -= covxy.dotProduct(b);
if (residualVariance <= 0) {
if (isVerbose()) {
out.println("Nonpositive residual varianceY: resVar / varianceY = " + (residualVariance / getCovariances(k).getValue(i, i)));
}
return Double.NaN;
}
int cols = getCovariances(0).getDimension();
double q = 2 / (double) cols;
lik += -sampleSize * Math.log(residualVariance);
} catch (Exception e) {
boolean removedOne = true;
while (removedOne) {
List<Integer> _parents = new ArrayList<>();
for (int y = 0; y < parents.length; y++) _parents.add(parents[y]);
_parents.removeAll(forbidden);
parents = new int[_parents.size()];
for (int y = 0; y < _parents.size(); y++) parents[y] = _parents.get(y);
removedOne = printMinimalLinearlyDependentSet(parents, getCovariances(k));
}
return Double.NaN;
}
}
int p = parents.length;
double c = getPenaltyDiscount();
return 2 * lik - c * (p + 1) * Math.log(covariances.size() * sampleSize);
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class RegressionCovariance method regress.
/**
* Regresses the given target on the given regressors, yielding a regression
* plane, in which coefficients are given for each regressor plus the
* constant (if means have been specified, that is, for the last), and se,
* t, and p values are given for each regressor.
*
* @param target The variable being regressed.
* @param regressors The list of regressors.
* @return the regression plane.
*/
public RegressionResult regress(Node target, List<Node> regressors) {
TetradMatrix allCorrelations = correlations.getMatrix();
List<Node> variables = correlations.getVariables();
int yIndex = variables.indexOf(target);
int[] xIndices = new int[regressors.size()];
for (int i = 0; i < regressors.size(); i++) {
xIndices[i] = variables.indexOf(regressors.get(i));
if (xIndices[i] == -1) {
throw new NullPointerException("Can't find variable " + regressors.get(i) + " in this list: " + variables);
}
}
TetradMatrix rX = allCorrelations.getSelection(xIndices, xIndices);
TetradMatrix rY = allCorrelations.getSelection(xIndices, new int[] { yIndex });
TetradMatrix bStar = rX.inverse().times(rY);
TetradVector b = new TetradVector(bStar.rows() + 1);
for (int k = 1; k < b.size(); k++) {
double sdY = sd.get(yIndex);
double sdK = sd.get(xIndices[k - 1]);
b.set(k, bStar.get(k - 1, 0) * (sdY / sdK));
}
b.set(0, Double.NaN);
if (means != null) {
double b0 = means.get(yIndex);
for (int i = 0; i < xIndices.length; i++) {
b0 -= b.get(i + 1) * means.get(xIndices[i]);
}
b.set(0, b0);
}
int[] allIndices = new int[1 + regressors.size()];
allIndices[0] = yIndex;
for (int i = 1; i < allIndices.length; i++) {
allIndices[i] = variables.indexOf(regressors.get(i - 1));
}
TetradMatrix r = allCorrelations.getSelection(allIndices, allIndices);
TetradMatrix rInv = r.inverse();
int n = correlations.getSampleSize();
int k = regressors.size() + 1;
double vY = rInv.get(0, 0);
double r2 = 1.0 - (1.0 / vY);
// Book says n - 1.
double tss = n * sd.get(yIndex) * sd.get(yIndex);
double rss = tss * (1.0 - r2);
double seY = Math.sqrt(rss / (double) (n - k));
TetradVector sqErr = new TetradVector(allIndices.length);
TetradVector t = new TetradVector(allIndices.length);
TetradVector p = new TetradVector(allIndices.length);
sqErr.set(0, Double.NaN);
t.set(0, Double.NaN);
p.set(0, Double.NaN);
TetradMatrix rxInv = rX.inverse();
for (int i = 0; i < regressors.size(); i++) {
double _r2 = 1.0 - (1.0 / rxInv.get(i, i));
double _tss = n * sd.get(xIndices[i]) * sd.get(xIndices[i]);
double _se = seY / Math.sqrt(_tss * (1.0 - _r2));
double _t = b.get(i + 1) / _se;
double _p = 2 * (1.0 - ProbUtils.tCdf(Math.abs(_t), n - k));
sqErr.set(i + 1, _se);
t.set(i + 1, _t);
p.set(i + 1, _p);
}
// Graph
this.graph = createGraph(target, allIndices, regressors, p);
String[] vNames = createVarNamesArray(regressors);
double[] bArray = b.toArray();
double[] tArray = t.toArray();
double[] pArray = p.toArray();
double[] seArray = sqErr.toArray();
return new RegressionResult(false, vNames, n, bArray, tArray, pArray, seArray, r2, rss, alpha, null, null);
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class RegressionDatasetGeneralized method regress.
/**
* Regresses the target on the given regressors.
*
* @param target The target variable.
* @param regressors The regressor variables.
* @return The regression plane, specifying for each regressors its
* coefficeint, se, t, and p values, and specifying the same for the
* constant.
*/
public RegressionResult regress(Node target, List<Node> regressors) {
int n = data.rows();
int k = regressors.size() + 1;
int _target = variables.indexOf(target);
int[] _regressors = new int[regressors.size()];
for (int i = 0; i < regressors.size(); i++) {
_regressors[i] = variables.indexOf(regressors.get(i));
}
int[] rows = new int[data.rows()];
for (int i = 0; i < rows.length; i++) rows[i] = i;
// TetradMatrix y = data.viewSelection(rows, new int[]{_target}).copy();
TetradMatrix xSub = data.getSelection(rows, _regressors);
// TetradMatrix y = data.subsetColumns(Arrays.asList(target)).getDoubleData();
// RectangularDataSet rectangularDataSet = data.subsetColumns(regressors);
// TetradMatrix xSub = rectangularDataSet.getDoubleData();
TetradMatrix X = new TetradMatrix(xSub.rows(), xSub.columns() + 1);
for (int i = 0; i < X.rows(); i++) {
for (int j = 0; j < X.columns(); j++) {
if (j == 0) {
X.set(i, j, 1);
} else {
X.set(i, j, xSub.get(i, j - 1));
}
}
}
// for (int i = 0; i < zList.size(); i++) {
// zCols[i] = getVariable().indexOf(zList.get(i));
// }
// int[] zRows = new int[data.rows()];
// for (int i = 0; i < data.rows(); i++) {
// zRows[i] = i;
// }
TetradVector y = data.getColumn(_target);
TetradMatrix Xt = X.transpose();
TetradMatrix XtX = Xt.times(X);
TetradMatrix G = XtX.inverse();
TetradMatrix GXt = G.times(Xt);
TetradVector b = GXt.times(y);
TetradVector yPred = X.times(b);
// TetradVector xRes = yPred.copy().assign(y, Functions.minus);
TetradVector xRes = yPred.minus(y);
double rss = rss(X, y, b);
double se = Math.sqrt(rss / (n - k));
double tss = tss(y);
double r2 = 1.0 - (rss / tss);
// TetradVector sqErr = TetradVector.instance(y.columns());
// TetradVector t = TetradVector.instance(y.columns());
// TetradVector p = TetradVector.instance(y.columns());
//
// for (int i = 0; i < 1; i++) {
// double _s = se * se * xTxInv.get(i, i);
// double _se = Math.sqrt(_s);
// double _t = b.get(i) / _se;
// double _p = 2 * (1.0 - ProbUtils.tCdf(Math.abs(_t), n - k));
//
// sqErr.set(i, _se);
// t.set(i, _t);
// p.set(i, _p);
// }
//
// this.graph = createOutputGraph(target.getNode(), y, regressors, p);
//
String[] vNames = new String[regressors.size()];
for (int i = 0; i < regressors.size(); i++) {
vNames[i] = regressors.get(i).getName();
}
return new RegressionResult(false, vNames, n, b.toArray(), new double[0], new double[0], new double[0], r2, rss, alpha, yPred, xRes);
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class RegressionUtils method residuals.
public static DataSet residuals(DataSet dataSet, Graph graph) {
Regression regression = new RegressionDataset(dataSet);
TetradMatrix residuals = new TetradMatrix(dataSet.getNumRows(), dataSet.getNumColumns());
for (int i = 0; i < dataSet.getNumColumns(); i++) {
Node target = dataSet.getVariable(i);
Node _target = graph.getNode(target.getName());
if (_target == null) {
throw new IllegalArgumentException("Data variable not in graph: " + target);
}
Set<Node> _regressors = new HashSet<>(graph.getParents(_target));
System.out.println("For " + target + " regressors are " + _regressors);
List<Node> regressors = new LinkedList<>();
for (Node node : _regressors) {
regressors.add(dataSet.getVariable(node.getName()));
}
RegressionResult result = regression.regress(target, regressors);
TetradVector residualsColumn = result.getResiduals();
// residuals.viewColumn(i).assign(residualsColumn);
residuals.assignColumn(i, residualsColumn);
}
return ColtDataSet.makeContinuousData(dataSet.getVariables(), residuals);
}
Aggregations