use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testArrestsSVD.
@Test
public void testArrestsSVD() throws InterruptedException, ExecutionException {
// Initialize using first k rows of standardized training frame
Frame yinit = ArrayUtils.frame(ard(ard(1.24256408, 0.7828393, -0.5209066, -0.003416473), ard(0.50786248, 1.1068225, -1.2117642, 2.484202941), ard(0.07163341, 1.4788032, 0.9989801, 1.042878388), ard(0.23234938, 0.2308680, -1.0735927, -0.184916602)));
double[] sval = new double[] { 11.024148, 6.964086, 4.179904, 2.915146 };
double[][] eigvec = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
GLRMModel model = null;
Frame train = null;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._k = 4;
parms._transform = DataInfo.TransformType.STANDARDIZE;
// parms._init = GLRM.Initialization.PlusPlus;
parms._init = GlrmInitialization.User;
parms._user_y = yinit._key;
parms._max_iterations = 1000;
parms._min_step_size = 1e-8;
parms._recover_svd = true;
GLRM job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
// checkStddev(sval, model._output._singular_vals, 1e-4);
// checkEigvec(eigvec, model._output._eigenvectors_raw, 1e-4);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
Assert.assertEquals(model._output._objective, mm._numerr, TOLERANCE);
} finally {
yinit.delete();
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testSubset.
@Ignore
@Test
public void testSubset() throws InterruptedException, ExecutionException {
//Analogous to pyunit_subset_glrm.py
GLRM job = null;
GLRMModel model = null;
Frame train;
InputStream is;
try {
is = new FileInputStream(FileUtils.getFile("bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip"));
UploadFileVec.ReadPutStats stats = new UploadFileVec.ReadPutStats();
UploadFileVec.readPut("train", is, stats);
} catch (Exception e) {
e.printStackTrace();
}
ParseDataset.parse(Key.make("train_parsed"), Key.make("train"));
train = DKV.getGet("train_parsed");
try {
Log.info("num chunks: ", train.anyVec().nChunks());
Vec[] acs_zcta_vec = { train.vec(0).toCategoricalVec() };
Frame acs_zcta_fr = new Frame(Key.<Frame>make("acs_zcta_fr"), new String[] { "name" }, acs_zcta_vec);
DKV.put(acs_zcta_fr);
train.remove(0).remove();
DKV.put(train);
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._gamma_x = 0.25;
parms._gamma_y = 0.5;
parms._regularization_x = GlrmRegularizer.Quadratic;
parms._regularization_y = GlrmRegularizer.L1;
parms._k = 10;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._max_iterations = 1;
parms._loss = GlrmLoss.Quadratic;
try {
Scope.enter();
job = new GLRM(parms);
model = job.trainModel().get();
String s = "(tmp= py_4 (rows (cols_py " + model._output._representation_key + " [0 1]) (tmp= py_3 (| (| (| (| (| (== (tmp= py_2 " + acs_zcta_fr._key + ") \"10065\") (== py_2 \"11219\")) (== py_2 \"66753\")) (== py_2 \"84104\")) (== py_2 \"94086\")) (== py_2 \"95014\")))))";
Val val = Rapids.exec(s);
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
acs_zcta_fr.delete();
Scope.exit();
}
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testArrestsPlusPlus.
@Test
public void testArrestsPlusPlus() throws InterruptedException, ExecutionException {
GLRMModel model = null;
Frame train = null;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._k = 4;
parms._loss = GlrmLoss.Huber;
parms._regularization_x = GlrmRegularizer.NonNegative;
parms._regularization_y = GlrmRegularizer.NonNegative;
parms._gamma_x = parms._gamma_y = 1;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._init = GlrmInitialization.PlusPlus;
parms._max_iterations = 100;
parms._min_step_size = 1e-8;
parms._recover_svd = true;
GLRM job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testRegularizers.
@Test
public void testRegularizers() throws InterruptedException, ExecutionException {
// Initialize using first 4 rows of USArrests
Frame init = ArrayUtils.frame(ard(ard(13.2, 236, 58, 21.2), ard(10.0, 263, 48, 44.5), ard(8.1, 294, 80, 31.0), ard(8.8, 190, 50, 19.5)));
GLRM job = null;
GLRMModel model = null;
Frame train = null;
long seed = 1234;
try {
Scope.enter();
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._k = 4;
parms._init = GlrmInitialization.User;
parms._user_y = init._key;
parms._transform = DataInfo.TransformType.NONE;
parms._recover_svd = false;
parms._max_iterations = 1000;
parms._seed = seed;
Log.info("\nNon-negative matrix factorization");
parms._gamma_x = parms._gamma_y = 1;
parms._regularization_x = GlrmRegularizer.NonNegative;
parms._regularization_y = GlrmRegularizer.NonNegative;
try {
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
Log.info("Archetypes:\n" + model._output._archetypes.toString());
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (model != null)
model.delete();
}
Log.info("\nOrthogonal non-negative matrix factorization");
parms._gamma_x = parms._gamma_y = 1;
parms._regularization_x = GlrmRegularizer.OneSparse;
parms._regularization_y = GlrmRegularizer.NonNegative;
try {
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
Log.info("Archetypes:\n" + model._output._archetypes.toString());
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (model != null)
model.delete();
}
Log.info("\nQuadratic clustering (k-means)");
parms._gamma_x = 1;
parms._gamma_y = 0;
parms._regularization_x = GlrmRegularizer.UnitOneSparse;
parms._regularization_y = GlrmRegularizer.None;
try {
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
Log.info("Archetypes:\n" + model._output._archetypes.toString());
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (model != null)
model.delete();
}
Log.info("\nQuadratic mixture (soft k-means)");
parms._gamma_x = 1;
parms._gamma_y = 0;
parms._regularization_x = GlrmRegularizer.UnitOneSparse;
parms._regularization_y = GlrmRegularizer.None;
try {
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
Log.info("Archetypes:\n" + model._output._archetypes.toString());
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (model != null)
model.delete();
}
} finally {
init.delete();
if (train != null)
train.delete();
Scope.exit();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testArrestsVarianceMetrics.
// PUBDEV-3501: Variance metrics for GLRM. I compared the variance metrics calculated by PCA
// and by GLRM to make sure they agree.
@Test
public void testArrestsVarianceMetrics() throws InterruptedException, ExecutionException {
// Results with de-meaned training frame
double[] stddev = new double[] { 83.732400, 14.212402, 6.489426, 2.482790 };
double[][] eigvec = ard(ard(0.04170432, -0.04482166, 0.07989066, -0.99492173), ard(0.99522128, -0.05876003, -0.06756974, 0.03893830), ard(0.04633575, 0.97685748, -0.20054629, -0.05816914), ard(0.07515550, 0.20071807, 0.97408059, 0.07232502));
// Results with standardized training frame
double[] stddev_std = new double[] { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
double[][] eigvec_std = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
Frame train = null;
PCAModel model = null;
GLRMModel gmodel = null;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
for (DataInfo.TransformType std : new DataInfo.TransformType[] { DataInfo.TransformType.DEMEAN, DataInfo.TransformType.STANDARDIZE }) {
try {
// build PCA
PCAModel.PCAParameters parms = new PCAModel.PCAParameters();
parms._train = train._key;
parms._k = 4;
parms._transform = std;
parms._max_iterations = 1000;
parms._pca_method = PCAModel.PCAParameters.Method.Power;
model = new PCA(parms).trainModel().get();
// build GLRM
GLRMParameters gparms = new GLRMParameters();
gparms._train = train._key;
gparms._k = 4;
gparms._transform = std;
gparms._loss = GlrmLoss.Quadratic;
gparms._init = GlrmInitialization.SVD;
gparms._max_iterations = 2000;
gparms._gamma_x = 0;
gparms._gamma_y = 0;
gparms._recover_svd = true;
gmodel = new GLRM(gparms).trainModel().get();
assert gmodel != null;
IcedWrapper[][] pcaInfo = model._output._importance.getCellValues();
IcedWrapper[][] glrmInfo = gmodel._output._importance.getCellValues();
if (std == DataInfo.TransformType.DEMEAN) {
// check to make sure PCA generated correct results first
TestUtil.checkStddev(stddev, model._output._std_deviation, TOLERANCE);
TestUtil.checkEigvec(eigvec, model._output._eigenvectors, TOLERANCE);
} else if (std == DataInfo.TransformType.STANDARDIZE) {
TestUtil.checkStddev(stddev_std, model._output._std_deviation, TOLERANCE);
TestUtil.checkEigvec(eigvec_std, model._output._eigenvectors, TOLERANCE);
}
// compare PCA and GLRM variance metrics here after we know PCA has worked correctly
TestUtil.checkIcedArrays(model._output._importance.getCellValues(), gmodel._output._importance.getCellValues(), TOLERANCE);
} finally {
if (model != null)
model.delete();
if (gmodel != null)
gmodel.delete();
}
}
} finally {
if (train != null)
train.delete();
}
}
Aggregations