use of hex.ModelMetricsRegressionGLM in project h2o-3 by h2oai.
the class GLMBasicTestRegression method testPoissonWithOffset.
@Test
public void testPoissonWithOffset() {
GLMModel model = null;
Frame scoreTrain = null;
// Call: glm(formula = formula, family = poisson, data = D)
//
// Coefficients:
// (Intercept) Merit1 Merit2 Merit3 Class2 Class3 Class4 Class5
// -2.0357 -0.1378 -0.2207 -0.4930 0.2998 0.4691 0.5259 0.2156
//
// Degrees of Freedom: 19 Total (i.e. Null); 12 Residual
// Null Deviance: 33850
// Residual Deviance: 579.5 AIC: 805.9
String[] cfs1 = new String[] { "Intercept", "Merit.1", "Merit.2", "Merit.3", "Class.2", "Class.3", "Class.4", "Class.5" };
double[] vals = new double[] { -2.0357, -0.1378, -0.2207, -0.4930, 0.2998, 0.4691, 0.5259, 0.2156 };
GLMParameters parms = new GLMParameters(Family.poisson);
parms._train = _canCarTrain._key;
parms._ignored_columns = new String[] { "Insured", "Premium", "Cost" };
// "response_column":"Claims","offset_column":"logInsured"
parms._response_column = "Claims";
parms._offset_column = "logInsured";
parms._standardize = false;
parms._lambda = new double[] { 0 };
parms._alpha = new double[] { 0 };
parms._objective_epsilon = 0;
parms._beta_epsilon = 1e-6;
parms._gradient_epsilon = 1e-10;
parms._max_iterations = 1000;
for (Solver s : GLMParameters.Solver.values()) {
// skip for now, does not handle zero columns (introduced by extra missing bucket with no missing in the dataset)
if (s == Solver.COORDINATE_DESCENT_NAIVE)
continue;
try {
parms._solver = s;
model = new GLM(parms).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
System.out.println("coefs = " + coefs);
for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), 1e-4);
assertEquals(33850, GLMTest.nullDeviance(model), 5);
assertEquals(579.5, GLMTest.residualDeviance(model), 1e-4 * 579.5);
assertEquals(19, GLMTest.nullDOF(model), 0);
assertEquals(12, GLMTest.resDOF(model), 0);
assertEquals(805.9, GLMTest.aic(model), 1e-4 * 805.9);
// test scoring
try {
Frame fr = new Frame(_canCarTrain.names(), _canCarTrain.vecs());
fr.remove(parms._offset_column);
scoreTrain = model.score(fr);
assertTrue("shoul've thrown IAE", false);
} catch (IllegalArgumentException iae) {
assertTrue(iae.getMessage().contains("Test/Validation dataset is missing offset column"));
}
scoreTrain = model.score(_canCarTrain);
hex.ModelMetricsRegressionGLM mmTrain = (ModelMetricsRegressionGLM) hex.ModelMetricsRegression.getFromDKV(model, _canCarTrain);
assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
assertEquals(GLMTest.residualDeviance(model), mmTrain._resDev, 1e-8);
assertEquals(GLMTest.nullDeviance(model), mmTrain._nullDev, 1e-8);
} finally {
if (model != null)
model.delete();
if (scoreTrain != null)
scoreTrain.delete();
}
}
}
use of hex.ModelMetricsRegressionGLM in project h2o-3 by h2oai.
the class GLMBasicTestRegression method testTweedie.
@Test
public void testTweedie() {
GLMModel model = null;
Frame scoreTrain = null;
// -------------------------------------- R examples output ----------------------------------------------------------------
// Call: glm(formula = Infections ~ ., family = tweedie(0), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// 0.8910 0.8221 0.7266 -0.5033 -0.2679 -0.1056
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 1564
// Residual Deviance: 1469 AIC: NA
// Call: glm(formula = Infections ~ ., family = tweedie(1), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// -0.12261 0.61149 0.53454 -0.37442 -0.18973 -0.08985
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 824.5
// Residual Deviance: 755.4 AIC: NA
// Call: glm(formula = Infections ~ ., family = tweedie(1.25), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// 1.02964 -0.14079 -0.12200 0.08502 0.04269 0.02105
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 834.2
// Residual Deviance: 770.8 AIC: NA
// Call: glm(formula = Infections ~ ., family = tweedie(1.5), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// 1.05665 -0.25891 -0.22185 0.15325 0.07624 0.03908
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 967
// Residual Deviance: 908.9 AIC: NA
// Call: glm(formula = Infections ~ ., family = tweedie(1.75), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// 1.08076 -0.35690 -0.30154 0.20556 0.10122 0.05375
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 1518
// Residual Deviance: 1465 AIC: NA
// Call: glm(formula = Infections ~ ., family = tweedie(2), data = D)
//
// Coefficients:
// (Intercept) SwimmerOccas LocationNonBeach Age20-24 Age25-29 SexMale
// 1.10230 -0.43751 -0.36337 0.24318 0.11830 0.06467
//
// Degrees of Freedom: 286 Total (i.e. Null); 281 Residual
// Null Deviance: 964.4
// Residual Deviance: 915.7 AIC: NA
// ---------------------------------------------------------------------------------------------------------------------------
String[] cfs1 = new String[] { "Intercept", "Swimmer.Occas", "Location.NonBeach", "Age.20-24", "Age.25-29", "Sex.Male" };
double[][] vals = new double[][] { { 0.89100, 0.82210, 0.72660, -0.50330, -0.26790, -0.10560 }, { -0.12261, 0.61149, 0.53454, -0.37442, -0.18973, -0.08985 }, { 1.02964, -0.14079, -0.12200, 0.08502, 0.04269, 0.02105 }, { 1.05665, -0.25891, -0.22185, 0.15325, 0.07624, 0.03908 }, { 1.08076, -0.35690, -0.30154, 0.20556, 0.10122, 0.05375 }, { 1.10230, -0.43751, -0.36337, 0.24318, 0.11830, 0.06467 } };
int dof = 286, res_dof = 281;
double[] nullDev = new double[] { 1564, 824.5, 834.2, 967.0, 1518, 964.4 };
double[] resDev = new double[] { 1469, 755.4, 770.8, 908.9, 1465, 915.7 };
double[] varPow = new double[] { 0, 1.0, 1.25, 1.5, 1.75, 2.0 };
GLMParameters parms = new GLMParameters(Family.tweedie);
parms._train = _earinf._key;
parms._ignored_columns = new String[] {};
// "response_column":"Claims","offset_column":"logInsured"
parms._response_column = "Infections";
parms._standardize = false;
parms._lambda = new double[] { 0 };
parms._alpha = new double[] { 0 };
parms._gradient_epsilon = 1e-10;
parms._max_iterations = 1000;
parms._objective_epsilon = 0;
parms._beta_epsilon = 1e-6;
for (int x = 0; x < varPow.length; ++x) {
double p = varPow[x];
parms._tweedie_variance_power = p;
parms._tweedie_link_power = 1 - p;
for (Solver s : /*new Solver[]{Solver.IRLSM}*/
GLMParameters.Solver.values()) {
// ignore for now, has trouble with zero columns
if (s == Solver.COORDINATE_DESCENT_NAIVE)
continue;
try {
parms._solver = s;
model = new GLM(parms).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
System.out.println("coefs = " + coefs);
for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[x][i], coefs.get(cfs1[i]), 1e-4);
assertEquals(nullDev[x], (GLMTest.nullDeviance(model)), 5e-4 * nullDev[x]);
assertEquals(resDev[x], (GLMTest.residualDeviance(model)), 5e-4 * resDev[x]);
assertEquals(dof, GLMTest.nullDOF(model), 0);
assertEquals(res_dof, GLMTest.resDOF(model), 0);
// test scoring
scoreTrain = model.score(_earinf);
assertTrue(model.testJavaScoring(_earinf, scoreTrain, 1e-8));
hex.ModelMetricsRegressionGLM mmTrain = (ModelMetricsRegressionGLM) hex.ModelMetricsRegression.getFromDKV(model, _earinf);
assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
assertEquals(GLMTest.residualDeviance(model), mmTrain._resDev, 1e-8);
assertEquals(GLMTest.nullDeviance(model), mmTrain._nullDev, 1e-8);
} finally {
if (model != null)
model.delete();
if (scoreTrain != null)
scoreTrain.delete();
}
}
}
}
Aggregations