use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class RegressionDataset method regress.
/**
* Regresses the target on the given regressors.
*
* @param target The target variable.
* @param regressors The regressor variables.
* @return The regression plane, specifying for each regressors its
* coefficeint, se, t, and p values, and specifying the same for the
* constant.
*/
public RegressionResult regress(Node target, List<Node> regressors) {
int n = getRows().length;
int k = regressors.size() + 1;
int _target = variables.indexOf(target);
int[] _regressors = new int[regressors.size()];
for (int i = 0; i < regressors.size(); i++) {
_regressors[i] = variables.indexOf(regressors.get(i));
if (_regressors[i] == -1) {
System.out.println();
}
}
if (_target == -1) {
System.out.println();
}
TetradMatrix y = data.getSelection(getRows(), new int[] { _target }).copy();
TetradMatrix xSub = data.getSelection(getRows(), _regressors);
TetradMatrix x;
if (regressors.size() > 0) {
x = new TetradMatrix(xSub.rows(), xSub.columns() + 1);
for (int i = 0; i < x.rows(); i++) {
for (int j = 0; j < x.columns(); j++) {
if (j == 0) {
x.set(i, j, 1);
} else {
x.set(i, j, xSub.get(i, j - 1));
}
}
}
} else {
x = new TetradMatrix(xSub.rows(), xSub.columns());
for (int i = 0; i < x.rows(); i++) {
for (int j = 0; j < x.columns(); j++) {
x.set(i, j, xSub.get(i, j));
}
}
}
TetradMatrix xT = x.transpose();
TetradMatrix xTx = xT.times(x);
TetradMatrix xTxInv = xTx.inverse();
TetradMatrix xTy = xT.times(y);
TetradMatrix b = xTxInv.times(xTy);
TetradMatrix yHat = x.times(b);
if (yHat.columns() == 0)
yHat = y.like();
// y.copy().assign(yHat, PlusMult.plusMult(-1));
TetradMatrix res = y.minus(yHat);
TetradVector _yHat = yHat.getColumn(0);
TetradVector _res = res.getColumn(0);
TetradMatrix b2 = b.copy();
TetradMatrix yHat2 = x.times(b2);
if (yHat.columns() == 0)
yHat2 = y.like();
// y.copy().assign(yHat, PlusMult.plusMult(-1));
TetradMatrix res2 = y.minus(yHat2);
this.res2 = res2.getColumn(0);
double rss = rss(x, y, b);
double se = Math.sqrt(rss / (n - k));
double tss = tss(y);
double r2 = 1.0 - (rss / tss);
TetradVector sqErr = new TetradVector(x.columns());
TetradVector t = new TetradVector(x.columns());
TetradVector p = new TetradVector(x.columns());
for (int i = 0; i < x.columns(); i++) {
double _s = se * se * xTxInv.get(i, i);
double _se = Math.sqrt(_s);
double _t = b.get(i, 0) / _se;
double _p = 2 * (1.0 - ProbUtils.tCdf(Math.abs(_t), n - k));
sqErr.set(i, _se);
t.set(i, _t);
p.set(i, _p);
}
this.graph = createOutputGraph(target.getName(), x, regressors, p);
String[] vNames = new String[regressors.size()];
for (int i = 0; i < regressors.size(); i++) {
vNames[i] = regressors.get(i).getName();
}
double[] bArray = b.columns() == 0 ? new double[0] : b.getColumn(0).toArray();
double[] tArray = t.toArray();
double[] pArray = p.toArray();
double[] seArray = sqErr.toArray();
return new RegressionResult(regressors.size() == 0, vNames, n, bArray, tArray, pArray, seArray, r2, rss, alpha, _yHat, _res);
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class RegressionDataset method regress.
public static RegressionResult regress(double[] target, double[][] regressors) {
int n = target.length;
int k = regressors.length + 1;
String[] regressorNames = new String[regressors.length];
for (int i = 0; i < regressors.length; i++) {
regressorNames[i] = "X" + (i + 1);
}
TetradMatrix y = new TetradMatrix(new double[][] { target }).transpose();
TetradMatrix x = new TetradMatrix(regressors).transpose();
TetradMatrix xT = x.transpose();
TetradMatrix xTx = xT.times(x);
TetradMatrix xTxInv = xTx.inverse();
TetradMatrix xTy = xT.times(y);
TetradMatrix b = xTxInv.times(xTy);
TetradMatrix yHat = x.times(b);
if (yHat.columns() == 0)
yHat = y.like();
// y.copy().assign(yHat, PlusMult.plusMult(-1));
TetradMatrix res = y.minus(yHat);
TetradVector _yHat = yHat.getColumn(0);
TetradVector _res = res.getColumn(0);
TetradMatrix b2 = b.copy();
TetradMatrix yHat2 = x.times(b2);
if (yHat.columns() == 0)
yHat2 = y.like();
// y.copy().assign(yHat, PlusMult.plusMult(-1));
TetradMatrix _res2 = y.minus(yHat2);
TetradVector res2 = _res2.getColumn(0);
double rss = rss(x, y, b);
double se = Math.sqrt(rss / (n - k));
double tss = tss(y);
double r2 = 1.0 - (rss / tss);
TetradVector sqErr = new TetradVector(x.columns());
TetradVector t = new TetradVector(x.columns());
TetradVector p = new TetradVector(x.columns());
for (int i = 0; i < x.columns(); i++) {
double _s = se * se * xTxInv.get(i, i);
double _se = Math.sqrt(_s);
double _t = b.get(i, 0) / _se;
double _p = 2 * (1.0 - ProbUtils.tCdf(Math.abs(_t), n - k));
sqErr.set(i, _se);
t.set(i, _t);
p.set(i, _p);
}
double[] bArray = b.columns() == 0 ? new double[0] : b.getColumn(0).toArray();
double[] tArray = t.toArray();
double[] pArray = p.toArray();
double[] seArray = sqErr.toArray();
return new RegressionResult(true, regressorNames, n, bArray, tArray, pArray, seArray, r2, rss, 0.05, _yHat, _res);
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class TestStandardizedSem method isStandardized.
private boolean isStandardized(StandardizedSemIm sem) {
DataSet dataSet = sem.simulateData(5000, false);
TetradMatrix _dataSet = dataSet.getDoubleData();
TetradMatrix cov = DataUtils.cov(_dataSet);
TetradVector means = DataUtils.mean(_dataSet);
for (int i = 0; i < cov.rows(); i++) {
if (!(Math.abs(cov.get(i, i) - 1) < .1)) {
return false;
}
if (!(Math.abs(means.get(i)) < .1)) {
return false;
}
}
return true;
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class FastIca method icaDeflation.
// ==============================PRIVATE METHODS==========================//
private TetradMatrix icaDeflation(TetradMatrix X, int numComponents, double tolerance, int function, double alpha, int maxIterations, boolean verbose, TetradMatrix wInit) {
if (verbose && function == LOGCOSH) {
TetradLogger.getInstance().log("info", "Deflation FastIca using lgcosh approx. to neg-entropy function");
}
if (verbose && function == EXP) {
TetradLogger.getInstance().log("info", "Deflation FastIca using exponential approx. to neg-entropy function");
}
int p = X.columns();
TetradMatrix W = new TetradMatrix(numComponents, numComponents);
for (int i = 0; i < numComponents; i++) {
if (verbose) {
TetradLogger.getInstance().log("fastIcaDetails", "Component " + (i + 1));
}
TetradVector w = wInit.getRow(i);
if (i > 0) {
TetradVector t = w.like();
for (int u = 0; u < i; u++) {
double k = 0.0;
for (int j = 0; j < numComponents; j++) {
k += w.get(j) * W.get(u, j);
}
for (int j = 0; j < numComponents; j++) {
t.set(j, t.get(j) + k * W.get(u, j));
}
}
for (int j = 0; j < numComponents; j++) {
w.set(j, w.get(j) - t.get(j));
}
}
double rms = rms(w);
for (int j = 0; j < numComponents; j++) {
w.set(j, w.get(j) / rms);
}
int it = 0;
double _tolerance = Double.POSITIVE_INFINITY;
if (function == LOGCOSH) {
while (_tolerance > tolerance && ++it <= maxIterations) {
TetradVector wx = X.transpose().times(w);
TetradVector gwx0 = new TetradVector(p);
for (int j = 0; j < p; j++) {
gwx0.set(j, Math.tanh(alpha * wx.get(j)));
}
TetradMatrix gwx = new TetradMatrix(numComponents, p);
for (int _i = 0; _i < numComponents; _i++) {
for (int j = 0; j < p; j++) {
gwx.set(_i, j, gwx0.get(j));
}
}
TetradMatrix xgwx = new TetradMatrix(numComponents, p);
for (int _i = 0; _i < numComponents; _i++) {
for (int j = 0; j < p; j++) {
xgwx.set(_i, j, X.get(_i, j) * gwx0.get(j));
}
}
TetradVector v1 = new TetradVector(numComponents);
for (int k = 0; k < numComponents; k++) {
v1.set(k, mean(xgwx.getRow(k)));
}
TetradVector g_wx = new TetradVector(p);
for (int k = 0; k < p; k++) {
double tmp1 = Math.tanh(alpha * wx.get(k));
g_wx.set(k, alpha * (1.0 - tmp1 * tmp1));
}
TetradVector v2 = w.copy();
double meanGwx = mean(g_wx);
v2 = v2.scalarMult(meanGwx);
TetradVector w1 = v1.copy();
// w1.assign(v2, PlusMult.plusMult(-1));
w1 = w1.minus(v2);
if (i > 0) {
TetradVector t = w1.like();
for (int u = 0; u < i; u++) {
double k = 0.0;
for (int j = 0; j < numComponents; j++) {
k += w1.get(j) * W.get(u, j);
}
for (int j = 0; j < numComponents; j++) {
t.set(j, t.get(j) + k * W.get(u, j));
}
}
for (int j = 0; j < numComponents; j++) {
w1.set(j, w1.get(j) - t.get(j));
}
}
double _rms = rms(w1);
for (int k = 0; k < numComponents; k++) {
w1.set(k, w1.get(k) / _rms);
}
_tolerance = 0.0;
for (int k = 0; k < numComponents; k++) {
_tolerance += w1.get(k) * w.get(k);
}
_tolerance = Math.abs(Math.abs(_tolerance) - 1.0);
if (verbose) {
TetradLogger.getInstance().log("fastIcaDetails", "Iteration " + it + " tol = " + _tolerance);
}
w = w1;
}
} else if (function == EXP) {
while (_tolerance > tolerance && ++it <= maxIterations) {
TetradVector wx = X.transpose().times(w);
TetradVector gwx0 = new TetradVector(p);
for (int j = 0; j < p; j++) {
gwx0.set(j, wx.get(j) * Math.exp(-(wx.get(j) * wx.get(j)) / 2));
}
TetradMatrix gwx = new TetradMatrix(numComponents, p);
for (int _i = 0; _i < numComponents; _i++) {
for (int j = 0; j < p; j++) {
gwx.set(_i, j, gwx0.get(j));
}
}
TetradMatrix xgwx = new TetradMatrix(numComponents, p);
for (int _i = 0; _i < numComponents; _i++) {
for (int j = 0; j < p; j++) {
xgwx.set(_i, j, X.get(_i, j) * gwx0.get(j));
}
}
TetradVector v1 = new TetradVector(numComponents);
for (int k = 0; k < numComponents; k++) {
v1.set(k, mean(xgwx.getRow(k)));
}
TetradVector g_wx = new TetradVector(p);
for (int j = 0; j < p; j++) {
g_wx.set(j, (1.0 - wx.get(j) * wx.get(j)) * Math.exp(-(wx.get(j) * wx.get(j)) / 2));
}
TetradVector v2 = w.copy();
double meanGwx = mean(g_wx);
TetradVector w1 = v2.scalarMult(meanGwx).minus(v2);
if (i > 0) {
TetradVector t = w1.like();
for (int u = 0; u < i; u++) {
double k = 0.0;
for (int j = 0; j < numComponents; j++) {
k += w1.get(j) * W.get(u, j);
}
for (int j = 0; j < numComponents; j++) {
t.set(j, t.get(j) + k * W.get(u, j));
}
}
for (int j = 0; j < numComponents; j++) {
w1.set(j, w1.get(j) - t.get(j));
}
}
double _rms = rms(w1);
for (int k = 0; k < numComponents; k++) {
w1.set(k, w1.get(k) / _rms);
}
_tolerance = 0.0;
for (int k = 0; k < numComponents; k++) {
_tolerance += w1.get(k) * w.get(k);
}
_tolerance = Math.abs(Math.abs(_tolerance) - 1.0);
if (verbose) {
TetradLogger.getInstance().log("fastIcaDetails", "Iteration " + it + " tol = " + _tolerance);
}
w = w1;
}
}
W.assignRow(i, w);
}
return W;
}
use of edu.cmu.tetrad.util.TetradVector in project tetrad by cmu-phil.
the class ConditionalGaussianOtherLikelihood method likelihoodMixed.
// For cases like P(C | X). This is a ratio of joints, but if the numerator is conditional Gaussian,
// the denominator is a mixture of Gaussians.
private Ret likelihoodMixed(List<ContinuousVariable> X, List<DiscreteVariable> A, DiscreteVariable B) {
final int k = X.size();
final double g = Math.pow(2.0 * Math.PI, -0.5 * k) * Math.exp(-0.5 * k);
int[] continuousCols = new int[k];
for (int j = 0; j < k; j++) continuousCols[j] = nodesHash.get(X.get(j));
double lnL = 0.0;
int N = dataSet.getNumRows();
List<List<List<Integer>>> cells = adTree.getCellLeaves(A, B);
TetradMatrix defaultCov = null;
for (List<List<Integer>> mycells : cells) {
List<TetradMatrix> x = new ArrayList<>();
List<TetradMatrix> sigmas = new ArrayList<>();
List<TetradMatrix> inv = new ArrayList<>();
List<TetradVector> mu = new ArrayList<>();
for (List<Integer> cell : mycells) {
TetradMatrix subsample = getSubsample(continuousCols, cell);
try {
// Determinant will be zero if data are linearly dependent.
if (mycells.size() <= continuousCols.length)
throw new IllegalArgumentException();
TetradMatrix cov = cov(subsample);
TetradMatrix covinv = cov.inverse();
if (defaultCov == null) {
defaultCov = cov;
}
x.add(subsample);
sigmas.add(cov);
inv.add(covinv);
mu.add(means(subsample));
} catch (Exception e) {
// No contribution.
}
}
double[] factors = new double[x.size()];
for (int u = 0; u < x.size(); u++) {
factors[u] = g * Math.pow(sigmas.get(u).det(), -0.5);
}
double[] a = new double[x.size()];
for (int u = 0; u < x.size(); u++) {
for (int i = 0; i < x.get(u).rows(); i++) {
for (int v = 0; v < x.size(); v++) {
final TetradVector xm = x.get(u).getRow(i).minus(mu.get(v));
a[v] = prob(factors[v], inv.get(v), xm);
}
double num = a[u] * p(x, u, N);
double denom = 0.0;
for (int v = 0; v < x.size(); v++) {
denom += a[v] * (p(x, v, N));
}
lnL += log(num) - log(denom);
}
}
}
int p = (int) getPenaltyDiscount();
// Only count dof for continuous cells that contributed to the likelihood calculation.
int dof = f(A) * B.getNumCategories() + f(A) * p * h(X);
return new Ret(lnL, dof);
}
Aggregations