use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testAbalone.
public void testAbalone() {
GLMModel model = null;
try {
Frame fr = parse_test_file("smalldata/glm_test/Abalone.gz");
GLMParameters params = new GLMParameters(Family.gaussian);
params._train = fr._key;
params._response_column = fr._names[8];
params._alpha = new double[] { 1.0 };
params._lambda_search = true;
GLM glm = new GLM(params);
model = glm.trainModel().get();
testScoring(model, fr);
} finally {
if (model != null)
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method test_COD_Airlines_LambdaSearch_CovUpdates.
public void test_COD_Airlines_LambdaSearch_CovUpdates() {
GLMModel model1 = null;
// Distance + Origin + Dest + UniqueCarrier
Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/");
String[] ignoredCols = new String[] { "IsDepDelayed_REC" };
try {
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "IsDepDelayed";
params._ignored_columns = ignoredCols;
params._train = fr._key;
params._valid = fr._key;
// new double [] {0.25};
params._lambda = null;
params._alpha = new double[] { 1 };
params._standardize = false;
params._solver = Solver.COORDINATE_DESCENT;
params._lambda_search = true;
params._nlambdas = 5;
GLM glm = new GLM(params);
model1 = glm.trainModel().get();
GLMModel.Submodel sm = model1._output._submodels[model1._output._submodels.length - 1];
double[] beta = sm.beta;
System.out.println("lambda " + sm.lambda_value);
double l1pen = ArrayUtils.l1norm(beta, true);
double l2pen = ArrayUtils.l2norm2(beta, true);
// double objective = job.likelihood()/model1._nobs + // gives likelihood of the last lambda
// params._l2pen[params._l2pen.length-1]*params._alpha[0]*l1pen + params._l2pen[params._l2pen.length-1]*(1-params._alpha[0])*l2pen/2 ;
// assertEquals(0.65689, objective,1e-4);
} finally {
if (model1 != null)
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testArcene.
* Test strong rules on arcene datasets (10k predictors, 100 rows).
* Should be able to obtain good model (~100 predictors, ~1 explained deviance) with up to 250 active predictors.
* Scaled down (higher lambda min, fewer lambdas) to run at reasonable speed (whole test takes 20s on my laptop).
* Test runs glm with gaussian on arcene dataset and verifies it gets all lambda while limiting maximum actove predictors to reasonably small number.
* Compares the objective value to expected one.
public void testArcene() throws InterruptedException, ExecutionException {
Key parsed = Key.make("arcene_parsed");
Key<GLMModel> modelKey = Key.make("arcene_model");
GLMModel model = null;
Frame fr = parse_test_file(parsed, "smalldata/glm_test/arcene.csv");
try {
// test LBFGS with l1 pen
GLMParameters params = new GLMParameters(Family.gaussian);
// params._response = 0;
params._lambda = null;
params._response_column = fr._names[0];
params._train = parsed;
params._lambda_search = true;
params._nlambdas = 35;
params._lambda_min_ratio = 0.18;
params._max_iterations = 100000;
params._max_active_predictors = 10000;
params._alpha = new double[] { 1 };
for (Solver s : new Solver[] { Solver.IRLSM, Solver.COORDINATE_DESCENT }) {
//Solver.COORDINATE_DESCENT,}) { // LBFGS lambda-search is too slow now
params._solver = s;
GLM glm = new GLM(params, modelKey);
model = DKV.get(modelKey).get();
// assert on that we got all submodels (if strong rules work, we should be able to get the results with this many active predictors)
assertEquals(params._nlambdas, model._output._submodels.length);
// assert on the quality of the result, technically should compare objective value, but this should be good enough for now
params._solver = Solver.COORDINATE_DESCENT;
params._max_active_predictors = 100;
params._lambda_min_ratio = 1e-2;
params._nlambdas = 100;
GLM glm = new GLM(params, modelKey);
model = DKV.get(modelKey).get();
assertTrue(model._output.rank() <= params._max_active_predictors);
// System.out.println("============================================================================================================");
// assert on that we got all submodels (if strong rules work, we should be able to get the results with this many active predictors)
params._max_active_predictors = 250;
params._lambda = null;
params._lambda_search = false;
glm = new GLM(params, modelKey);
model = DKV.get(modelKey).get();
assertTrue(model._output.rank() <= params._max_active_predictors);
// System.out.println("============================================================================================================");
// assert on that we got all submodels (if strong rules work, we should be able to get the results with this many active predictors)
} finally {
if (model != null)
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMBasicTestBinomial method testNoIntercept.
public void testNoIntercept() {
GLMModel model = null;
// Call: glm(formula = CAPSULE ~ . - 1 - RACE - DCAPS, family = binomial,
// data = train)
// Coefficients:
// -0.00743 -6.46499 -5.60120 -5.18213 -5.70027 0.02753 -0.01235 0.86122
// Degrees of Freedom: 290 Total (i.e. Null); 282 Residual
// Null Deviance: 402
// Residual Deviance: 302.9 AIC: 318.9
String[] cfs1 = new String[] { "AGE", "DPROS.a", "DPROS.b", "DPROS.c", "DPROS.d", "PSA", "VOL", "GLEASON" };
double[] vals = new double[] { -0.00743, -6.46499, -5.60120, -5.18213, -5.70027, 0.02753, -0.01235, 0.86122 };
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "CAPSULE";
params._ignored_columns = new String[] { "ID", "RACE", "DCAPS" };
params._train = _prostateTrain._key;
params._valid = _prostateTest._key;
params._lambda = new double[] { 0 };
params._alpha = new double[] { 0 };
params._standardize = false;
params._intercept = false;
params._objective_epsilon = 0;
params._gradient_epsilon = 1e-6;
params._missing_values_handling = MissingValuesHandling.Skip;
// not expected to reach max iterations here
params._max_iterations = 100;
for (Solver s : new Solver[] { Solver.AUTO, Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
Frame scoreTrain = null, scoreTest = null;
try {
params._solver = s;
System.out.println("SOLVER = " + s);
model = new GLM(params).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
System.out.println("coefs = " + coefs.toString());
System.out.println("metrics = " + model._output._training_metrics);
boolean CD = (s == Solver.COORDINATE_DESCENT || s == Solver.COORDINATE_DESCENT_NAIVE);
for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), CD ? 1e-1 : 1e-4);
assertEquals(402, GLMTest.nullDeviance(model), 1e-1);
assertEquals(302.9, GLMTest.residualDeviance(model), 1e-1);
assertEquals(290, GLMTest.nullDOF(model), 0);
assertEquals(282, GLMTest.resDOF(model), 0);
assertEquals(318.9, GLMTest.aic(model), 1e-1);
System.out.println("VAL METRICS: " + model._output._validation_metrics);
// compare validation res dev matches R
// sum(binomial()$dev.resids(y=test$CAPSULE,mu=p,wt=1))
// [1]80.92923
assertTrue(80.92923 >= GLMTest.residualDevianceTest(model) - 1e-2);
// compare validation null dev against R
// sum(binomial()$dev.resids(y=test$CAPSULE,mu=.5,wt=1))
// [1] 124.7665
assertEquals(124.7665, GLMTest.nullDevianceTest(model), 1e-4);
// test scoring
scoreTrain = model.score(_prostateTrain);
hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTrain);
hex.AUC2 adata = mm._auc;
assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
scoreTest = model.score(_prostateTest);
mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTest);
adata = mm._auc;
assertEquals(model._output._validation_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(model._output._validation_metrics._MSE, mm._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._validation_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
} finally {
if (model != null)
if (scoreTrain != null)
if (scoreTest != null)
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMBasicTestBinomial method testNoInterceptWithOffsetAndWeights.
public void testNoInterceptWithOffsetAndWeights() {
GLMModel model = null;
double[] offset_train = new double[] { -0.39771185, +1.20479170, -0.16374109, -0.97885903, -1.42996530, +0.83474893, +0.83474893, -0.74488827, +0.83474893, +0.86851236, +1.41589611, +1.41589611, -1.42996530, -0.39771185, -2.01111248, -0.39771185, -0.16374109, +0.62364452, -0.39771185, +0.60262749, -0.06143251, -1.42996530, -0.06143251, -0.06143251, +0.14967191, -0.06143251, -0.39771185, +0.14967191, +1.20479170, -0.39771185, -0.16374109, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -0.39771185, -0.64257969, +1.65774729, -0.97885903, -0.39771185, -0.39771185, -0.39771185, -1.42996530, +1.41589611, -0.06143251, -0.06143251, -0.39771185, -0.06143251, -0.06143251, -0.39771185, -0.06143251, +0.14967191, -0.39771185, -1.42996530, -0.39771185, -0.64257969, -0.39771185, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -2.01111248, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -1.42996530, -0.06143251, +1.41589611, +0.14967191, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -1.42996530, -0.39771185, +0.14967191, +1.41589611, -1.42996530, +1.41589611, -1.42996530, +1.41589611, -0.06143251, +0.14967191, -0.39771185, -0.97885903, -1.42996530, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.97885903, -0.06143251, -0.06143251, +0.86851236, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -1.42996530, -0.39771185, +1.20479170, -1.42996530, -0.39771185, -0.06143251, -1.42996530, -0.97885903, +0.14967191, +0.14967191, -1.42996530, -1.42996530, -0.39771185, -0.06143251, -0.43147527, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, +0.14967191, -0.06143251, +1.41589611, -0.06143251, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -1.42996530, -0.39771185, -0.64257969, -0.06143251, +1.20479170, -0.43147527, -0.97885903, -0.39771185, -0.39771185, -0.39771185, +0.14967191, -2.01111248, -1.42996530, -0.06143251, +0.83474893, -1.42996530, -1.42996530, -2.01111248, -1.42996530, -0.06143251, +0.86851236, +0.05524374, -0.39771185, -0.39771185, -0.39771185, +1.41589611, -1.42996530, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -0.39771185, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.97885903, -1.42996530, -0.39771185, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.39771185, +0.14967191, -0.06143251, -1.42996530, -1.42996530, +0.14967191, -0.39771185, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -1.42996530, -0.06143251, -1.42996530, +0.14967191, +1.20479170, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -0.06143251, -1.42996530, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.39771185, +0.86851236, -0.06143251, -0.97885903, -0.06143251, -0.64257969, +0.14967191, +0.86851236, -0.39771185, -0.39771185, -0.39771185, -0.64257969, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -0.06143251, +0.86851236, -0.97885903, -1.42996530, -1.42996530, -1.42996530, -1.42996530, +0.86851236, +0.14967191, -1.42996530, -0.97885903, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -1.42996530, -0.64257969, -2.01111248, -0.97885903, -0.39771185 };
double[] offset_test = new double[] { +1.65774729, -0.97700971, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.39152308, +0.05524374, +0.05524374, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +1.32146795, +0.39152308, +1.65774729, -0.97700971, +1.65774729, +0.39152308, +0.39152308, +1.65774729, +0.60262749, +0.05524374, +0.05524374, +0.05524374, +0.60262749, +0.05524374, -0.97700971, -0.97885903, +0.05524374, -2.01111248, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +0.60262749, +0.39152308, +0.60262749, -0.97700971, +0.39152308, +1.65774729, +0.39152308, +0.39152308, +0.05524374, +1.86885170, +0.05524374, -0.97700971, +0.60262749, -0.97700971, +0.60262749, -0.97700971, +0.39152308, -0.97700971, -0.43147527, +1.32146795, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +1.86885170, +0.05524374, +0.05524374, +1.86885170, +0.60262749, -0.64257969, -0.97700971, +0.60262749, +0.39152308, -0.97700971, -0.97700971, +0.05524374, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.60262749, +0.05524374, +0.05524374 };
// random observation weights, integers in 0 - 9 range
double[] weights_train = new double[] { 0, 6, 5, 4, 4, 8, 2, 4, 9, 5, 2, 0, 0, 4, 0, 0, 6, 3, 6, 5, 5, 5, 6, 0, 9, 9, 8, 6, 6, 5, 6, 1, 0, 6, 8, 6, 9, 2, 8, 0, 3, 0, 2, 3, 0, 2, 5, 0, 0, 3, 7, 4, 8, 4, 1, 9, 3, 7, 1, 3, 8, 6, 9, 5, 5, 1, 9, 5, 2, 1, 0, 6, 4, 0, 5, 3, 1, 2, 4, 0, 7, 9, 6, 8, 0, 2, 3, 7, 5, 8, 3, 4, 7, 8, 1, 2, 5, 7, 3, 7, 1, 1, 5, 7, 4, 9, 2, 6, 3, 5, 4, 9, 8, 1, 8, 5, 3, 0, 4, 5, 1, 2, 2, 7, 8, 3, 4, 9, 0, 1, 3, 9, 8, 7, 0, 8, 2, 7, 1, 9, 0, 7, 7, 5, 2, 9, 7, 6, 4, 3, 4, 6, 9, 1, 5, 0, 7, 9, 4, 1, 6, 8, 8, 5, 4, 2, 5, 9, 8, 1, 9, 2, 9, 2, 3, 0, 6, 7, 3, 2, 3, 0, 9, 5, 1, 8, 0, 2, 8, 6, 9, 5, 1, 2, 3, 1, 3, 5, 0, 7, 4, 0, 5, 5, 7, 9, 3, 0, 0, 0, 1, 5, 3, 2, 8, 9, 9, 1, 6, 2, 2, 0, 5, 5, 6, 2, 8, 8, 9, 8, 5, 0, 1, 5, 3, 0, 2, 5, 4, 0, 6, 5, 4, 5, 9, 7, 5, 6, 2, 2, 6, 2, 5, 1, 5, 9, 0, 3, 0, 2, 7, 0, 4, 7, 7, 9, 3, 7, 9, 7, 9, 6, 2, 6, 2, 2, 9, 0, 9, 8, 1, 2, 6, 3, 4, 1, 2, 2, 3, 0 };
Vec offsetVecTrain = _prostateTrain.anyVec().makeZero();
try (Vec.Writer vw = {
for (int i = 0; i < offset_train.length; ++i) vw.set(i, offset_train[i]);
Vec weightsVecTrain = _prostateTrain.anyVec().makeZero();
try (Vec.Writer vw = {
for (int i = 0; i < weights_train.length; ++i) vw.set(i, weights_train[i]);
Vec offsetVecTest = _prostateTest.anyVec().makeZero();
try (Vec.Writer vw = {
for (int i = 0; i < offset_test.length; ++i) vw.set(i, offset_test[i]);
Frame fTrain = new Frame(Key.<Frame>make("prostate_with_offset_train"), new String[] { "offset", "weights" }, new Vec[] { offsetVecTrain, weightsVecTrain });
fTrain.add(_prostateTrain.names(), _prostateTrain.vecs());
Frame fTest = new Frame(Key.<Frame>make("prostate_with_offset_test"), new String[] { "offset" }, new Vec[] { offsetVecTest });
fTest.add(_prostateTest.names(), _prostateTest.vecs());
// Call: glm(formula = CAPSULE ~ . - ID - RACE - DCAPS - DPROS - 1, family = binomial,
// data = train, weights = w, offset = offset_train)
// Coefficients:
// -0.070637 0.034939 -0.006326 0.645700
// Degrees of Freedom: 252 Total (i.e. Null); 248 Residual
// Null Deviance: 1494
// Residual Deviance: 1235 AIC: 1243
String[] cfs1 = new String[] { "Intercept", "AGE", "PSA", "VOL", "GLEASON" };
double[] vals = new double[] { 0, -0.070637, 0.034939, -0.006326, 0.645700 };
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "CAPSULE";
params._ignored_columns = new String[] { "ID", "RACE", "DPROS", "DCAPS" };
params._train = fTrain._key;
params._offset_column = "offset";
params._weights_column = "weights";
params._lambda = new double[] { 0 };
params._alpha = new double[] { 0 };
params._standardize = false;
params._objective_epsilon = 0;
params._gradient_epsilon = 1e-6;
// not expected to reach max iterations here
params._max_iterations = 100;
params._intercept = false;
params._beta_epsilon = 1e-6;
try {
for (Solver s : new Solver[] { Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
Frame scoreTrain = null, scoreTest = null;
try {
params._solver = s;
params._valid = fTest._key;
System.out.println("SOLVER = " + s);
try {
model = new GLM(params, Key.<GLMModel>make("prostate_model")).trainModel().get();
} catch (Exception iae) {
assertTrue(iae.getMessage().contains("Test/Validation dataset is missing weights column"));
params._valid = null;
model = new GLM(params, Key.<GLMModel>make("prostate_model")).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
System.out.println("coefs = " + coefs);
boolean CD = s == Solver.COORDINATE_DESCENT;
for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), CD ? 1e-2 : 1e-4);
assertEquals(1494, GLMTest.nullDeviance(model), 1);
assertEquals(1235, GLMTest.residualDeviance(model), 1);
assertEquals(252, GLMTest.nullDOF(model), 0);
assertEquals(248, GLMTest.resDOF(model), 0);
assertEquals(1243, GLMTest.aic(model), 1);
// test scoring
try {
scoreTrain = model.score(_prostateTrain);
assertTrue("shoul've thrown IAE", false);
} catch (IllegalArgumentException iae) {
assertTrue(iae.getMessage().contains("Test/Validation dataset is missing"));
hex.ModelMetricsBinomialGLM mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
hex.AUC2 adata = mmTrain._auc;
assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
scoreTrain = model.score(fTrain);
mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
adata = mmTrain._auc;
assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
// scoreTest = model.score(fTest);
// ModelMetricsBinomialGLM mmTest = (ModelMetricsBinomialGLM)hex.ModelMetricsBinomial.getFromDKV(model, fTest);
// adata = mmTest._auc;
// assertEquals(model._output._validation_metrics.auc()._auc, adata._auc, 1e-8);
// assertEquals(model._output._validation_metrics._MSE, mmTest._MSE, 1e-8);
// assertEquals(((ModelMetricsBinomialGLM) model._output._validation_metrics)._resDev, mmTest._resDev, 1e-8);
// // test the actual predictions
// Vec preds = scoreTest.vec("p1");
// for(int i = 0; i < pred_test.length; ++i)
// assertEquals(pred_test[i],,1e-6);
} finally {
if (model != null)
if (scoreTrain != null)
if (scoreTest != null)
} finally {