use of hex.pca.PCA in project h2o-2 by h2oai.
the class PCATest method testBasic.
@Test
public void testBasic() throws InterruptedException, ExecutionException {
boolean standardize = true;
PCAModel model = null;
Frame fr = null;
try {
Key kraw = Key.make("basicdata.raw");
FVecTest.makeByteVec(kraw, "x1,x2,x3\n0,1.0,-120.4\n1,0.5,89.3\n2,0.3333333,291.0\n3,0.25,-2.5\n4,0.20,-2.5\n5,0.1666667,-123.4\n6,0.1428571,-0.1\n7,0.1250000,18.3");
fr = ParseDataset2.parse(Key.make("basicdata.hex"), new Key[] { kraw });
Key kpca = Key.make("basicdata.pca");
new PCA("PCA on basic small dataset", kpca, fr, 0.0, standardize).invoke();
model = DKV.get(kpca).get();
Job.JobState jstate = model.get_params().state;
//HEX-1817
Assert.assertTrue(jstate == Job.JobState.DONE);
testHTML(model);
} finally {
if (fr != null)
fr.delete();
if (model != null)
model.delete();
}
}
use of hex.pca.PCA in project h2o-2 by h2oai.
the class PCATest method testArrests.
@Test
public void testArrests() throws InterruptedException, ExecutionException {
double tol = 0.25;
boolean standardize = true;
PCAModel model = null;
Frame fr = null;
double[] sdev_R = { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
double[][] eigv_R = { { -0.5358995, 0.4181809, -0.3412327, 0.64922780 }, { -0.5831836, 0.1879856, -0.2681484, -0.74340748 }, { -0.2781909, -0.8728062, -0.3780158, 0.13387773 }, { -0.5434321, -0.1673186, 0.8177779, 0.08902432 } };
try {
Key ksrc = Key.make("arrests.hex");
fr = getFrameForFile(ksrc, "smalldata/pca_test/USArrests.csv", null);
// Build PCA model on all columns
Key kdst = Key.make("arrests.pca");
new PCA("PCA test on USArrests", kdst, fr, tol, standardize).invoke();
model = DKV.get(kdst).get();
testHTML(model);
// Compare standard deviation and eigenvectors to R results
checkSdev(sdev_R, model.sdev());
checkEigvec(eigv_R, model.eigVec());
// Score original data set using PCA model
// Key kscore = Key.make("arrests.score");
// Frame score = PCAScoreTask.score(df, model._eigVec, kscore);
} finally {
if (fr != null)
fr.delete();
if (model != null)
model.delete();
}
}
use of hex.pca.PCA in project h2o-3 by h2oai.
the class GLRMTest method testArrestsVarianceMetrics.
// PUBDEV-3501: Variance metrics for GLRM. I compared the variance metrics calculated by PCA
// and by GLRM to make sure they agree.
@Test
public void testArrestsVarianceMetrics() throws InterruptedException, ExecutionException {
// Results with de-meaned training frame
double[] stddev = new double[] { 83.732400, 14.212402, 6.489426, 2.482790 };
double[][] eigvec = ard(ard(0.04170432, -0.04482166, 0.07989066, -0.99492173), ard(0.99522128, -0.05876003, -0.06756974, 0.03893830), ard(0.04633575, 0.97685748, -0.20054629, -0.05816914), ard(0.07515550, 0.20071807, 0.97408059, 0.07232502));
// Results with standardized training frame
double[] stddev_std = new double[] { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
double[][] eigvec_std = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
Frame train = null;
PCAModel model = null;
GLRMModel gmodel = null;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
for (DataInfo.TransformType std : new DataInfo.TransformType[] { DataInfo.TransformType.DEMEAN, DataInfo.TransformType.STANDARDIZE }) {
try {
// build PCA
PCAModel.PCAParameters parms = new PCAModel.PCAParameters();
parms._train = train._key;
parms._k = 4;
parms._transform = std;
parms._max_iterations = 1000;
parms._pca_method = PCAModel.PCAParameters.Method.Power;
model = new PCA(parms).trainModel().get();
// build GLRM
GLRMParameters gparms = new GLRMParameters();
gparms._train = train._key;
gparms._k = 4;
gparms._transform = std;
gparms._loss = GlrmLoss.Quadratic;
gparms._init = GlrmInitialization.SVD;
gparms._max_iterations = 2000;
gparms._gamma_x = 0;
gparms._gamma_y = 0;
gparms._recover_svd = true;
gmodel = new GLRM(gparms).trainModel().get();
assert gmodel != null;
IcedWrapper[][] pcaInfo = model._output._importance.getCellValues();
IcedWrapper[][] glrmInfo = gmodel._output._importance.getCellValues();
if (std == DataInfo.TransformType.DEMEAN) {
// check to make sure PCA generated correct results first
TestUtil.checkStddev(stddev, model._output._std_deviation, TOLERANCE);
TestUtil.checkEigvec(eigvec, model._output._eigenvectors, TOLERANCE);
} else if (std == DataInfo.TransformType.STANDARDIZE) {
TestUtil.checkStddev(stddev_std, model._output._std_deviation, TOLERANCE);
TestUtil.checkEigvec(eigvec_std, model._output._eigenvectors, TOLERANCE);
}
// compare PCA and GLRM variance metrics here after we know PCA has worked correctly
TestUtil.checkIcedArrays(model._output._importance.getCellValues(), gmodel._output._importance.getCellValues(), TOLERANCE);
} finally {
if (model != null)
model.delete();
if (gmodel != null)
gmodel.delete();
}
}
} finally {
if (train != null)
train.delete();
}
}
use of hex.pca.PCA in project h2o-2 by h2oai.
the class PCATest method testLinDep.
@Test
public void testLinDep() throws InterruptedException, ExecutionException {
Key kdata = Key.make("depdata.hex");
PCAModel model = null;
Frame fr = null;
double[] sdev_R = { 1.414214, 0 };
try {
Key kraw = Key.make("depdata.raw");
FVecTest.makeByteVec(kraw, "x1,x2\n0,0\n1,2\n2,4\n3,6\n4,8\n5,10");
fr = ParseDataset2.parse(kdata, new Key[] { kraw });
Key kpca = Key.make("depdata.pca");
new PCA("PCA on data with dependent cols", kpca, fr, 0.0, true).invoke();
model = DKV.get(kpca).get();
testHTML(model);
for (int i = 0; i < model.sdev().length; i++) Assert.assertEquals(sdev_R[i], model.sdev()[i], threshold);
} finally {
if (fr != null)
fr.delete();
if (model != null)
model.delete();
}
}