use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMCategoricalTest method testCategoricalIris.
@Test
public void testCategoricalIris() throws InterruptedException, ExecutionException {
GLRMModel model = null;
Frame train = null;
try {
train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._k = 4;
parms._loss = GlrmLoss.Absolute;
parms._init = GlrmInitialization.SVD;
parms._transform = DataInfo.TransformType.NONE;
parms._recover_svd = true;
parms._max_iterations = 1000;
model = new GLRM(parms).trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testArrests.
@Test
public void testArrests() throws InterruptedException, ExecutionException {
// Initialize using first k rows of standardized training frame
Frame yinit = ArrayUtils.frame(ard(ard(1.24256408, 0.7828393, -0.5209066, -0.003416473), ard(0.50786248, 1.1068225, -1.2117642, 2.484202941), ard(0.07163341, 1.4788032, 0.9989801, 1.042878388)));
GLRM job = null;
GLRMModel model = null;
Frame train = null;
long seed = 1234;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._gamma_x = parms._gamma_y = 0.5;
parms._regularization_x = GlrmRegularizer.Quadratic;
parms._regularization_y = GlrmRegularizer.Quadratic;
parms._k = 3;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._init = GlrmInitialization.User;
parms._recover_svd = false;
parms._user_y = yinit._key;
parms._seed = seed;
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
yinit.delete();
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testBenignSVD.
@Test
public void testBenignSVD() throws InterruptedException, ExecutionException {
GLRM job = null;
GLRMModel model = null;
Frame train = null;
try {
train = parse_test_file(Key.make("benign.hex"), "smalldata/logreg/benign.csv");
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._k = 10;
parms._gamma_x = parms._gamma_y = 0.25;
parms._regularization_x = GlrmRegularizer.Quadratic;
parms._regularization_y = GlrmRegularizer.Quadratic;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._init = GlrmInitialization.SVD;
parms._min_step_size = 1e-5;
parms._recover_svd = false;
parms._max_iterations = 2000;
job = new GLRM(parms);
model = job.trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.
the class GLRMTest method testArrestsMissing.
@Test
public void testArrestsMissing() throws InterruptedException, ExecutionException {
// Expected eigenvectors and their corresponding singular values with standardized data
double[] sval = new double[] { 11.024148, 6.964086, 4.179904, 2.915146 };
double[][] eigvec = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
long seed = 1234;
Frame train = null;
GLRMModel model = null;
GLRMParameters parms;
Map<Double, Double> sd_map = new TreeMap<>();
Map<Double, Double> ev_map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (double missing_fraction : new double[] { 0, 0.1, 0.25, 0.5, 0.75, 0.9 }) {
try {
Scope.enter();
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
// Add missing values to the training data
if (missing_fraction > 0) {
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
// Need to put the frame (to be modified) into DKV for MissingInserter to pick up
DKV.put(frtmp._key, frtmp);
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
// MissingInserter is non-blocking, must block here explicitly
j.execImpl().get();
// Delete the frame header (not the data)
DKV.remove(frtmp._key);
}
parms = new GLRMParameters();
parms._train = train._key;
parms._k = train.numCols();
parms._loss = GlrmLoss.Quadratic;
parms._regularization_x = GlrmRegularizer.None;
parms._regularization_y = GlrmRegularizer.None;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._init = GlrmInitialization.PlusPlus;
parms._max_iterations = 1000;
parms._seed = seed;
parms._recover_svd = true;
GLRM job = new GLRM(parms);
model = job.trainModel().get();
Log.info(100 * missing_fraction + "% missing values: Objective = " + model._output._objective);
double sd_err = errStddev(sval, model._output._singular_vals) / parms._k;
double ev_err = errEigvec(eigvec, model._output._eigenvectors_raw) / parms._k;
Log.info("Avg SSE in Std Dev = " + sd_err + "\tAvg SSE in Eigenvectors = " + ev_err);
sd_map.put(missing_fraction, sd_err);
ev_map.put(missing_fraction, ev_err);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
Assert.assertEquals(model._output._objective, mm._numerr, TOLERANCE);
Scope.exit();
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
sb.append("\nMissing Fraction --> Avg SSE in Std Dev\n");
for (String s : Arrays.toString(sd_map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
sb.append("\n");
sb.append("Missing Fraction --> Avg SSE in Eigenvectors\n");
for (String s : Arrays.toString(ev_map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
Log.info(sb.toString());
}
Aggregations