use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.
the class GramMatrixTest method testProstate.
@Test
public void testProstate() {
File f2 = find_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
Key ikey2 = NFSFileVec.make(f2);
Key okey2 = Key.make("glm_model2");
Frame fr2 = null;
try {
fr2 = ParseDataset2.parse(okey2, new Key[] { ikey2 });
DataInfo dinfo = new DataInfo(fr2, 0, true, false, DataInfo.TransformType.NONE);
GramTask gt = new GramTask(null, dinfo, true, false);
gt.doAll(dinfo._adaptedFrame);
double[][] res = gt._gram.getXX();
System.out.println(Utils.pprint(gt._gram.getXX()));
for (int i = 0; i < exp_result.length; ++i) for (int j = 0; j < exp_result.length; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
gt = new GramTask(null, dinfo, false, false);
gt.doAll(dinfo._adaptedFrame);
for (int i = 0; i < exp_result.length - 1; ++i) for (int j = 0; j < exp_result.length - 1; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
} finally {
fr2.delete();
}
}
use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.
the class PCA method execImpl.
@Override
protected void execImpl() {
Frame fr = selectFrame(source);
Vec[] vecs = fr.vecs();
// Remove constant cols and cols with too many NAs
ArrayList<Integer> removeCols = new ArrayList<Integer>();
for (int i = 0; i < vecs.length; i++) {
if (vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length() * 0.2)
// if(vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length()*0.2 || vecs[i].domain() != null)
removeCols.add(i);
}
if (!removeCols.isEmpty()) {
int[] cols = new int[removeCols.size()];
for (int i = 0; i < cols.length; i++) cols[i] = removeCols.get(i);
fr.remove(cols);
}
if (fr.numCols() < 2)
throw new IllegalArgumentException("Need more than one column to run PCA");
DataInfo dinfo = new DataInfo(fr, 0, false, false, standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE);
GramTask tsk = new GramTask(self(), dinfo, false, false).doAll(dinfo._adaptedFrame);
PCAModel myModel = buildModel(dinfo, tsk);
myModel.delete_and_lock(self());
myModel.unlock(self());
// Close/remove job
remove();
final JobState state = UKV.<Job>get(self()).state;
new TAtomic<PCAModel>() {
@Override
public PCAModel atomic(PCAModel m) {
if (m != null)
m.get_params().state = state;
return m;
}
}.invoke(dest());
}
use of hex.gram.Gram.GramTask in project h2o-2 by h2oai.
the class MatrixTest method testMultiplication.
// simple small & dense, compare t(X) %*% X against gram computed by glm task.
@Test
public void testMultiplication() {
Key parsed = Key.make("prostate_parsed");
Futures fs = new Futures();
Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
fr.remove("RACE").remove(fs);
Key k = Key.make("rebalanced");
H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
fr.delete();
fr = DKV.get(k).get();
Frame tr = DMatrix.transpose(fr);
tr.reloadVecs();
Frame z = DMatrix.mmul(tr, fr);
DataInfo dinfo = new DataInfo(fr, 0, false, false, DataInfo.TransformType.NONE);
GramTask gt = new GramTask(null, dinfo, false, false).doAll(dinfo._adaptedFrame);
gt._gram.mul(gt._nobs);
double[][] gram = gt._gram.getDenseXX();
for (int i = 0; i < gram.length; ++i) for (int j = 0; j < gram[i].length; ++j) assertEquals("position " + i + ", " + j, gram[i][j], z.vec(j).at(i), 1e-4);
fr.delete();
for (Vec v : tr.vecs()) v.remove(fs);
for (Vec v : z.vecs()) v.remove(fs);
// for(Vec v:z2.vecs())
// v.remove(fs);
fs.blockForPending();
checkLeakedKeys();
}
Aggregations