use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testSynthetic.
@Test
public void testSynthetic() throws Exception {
GLMModel model = null;
Frame fr = parse_test_file("smalldata/glm_test/glm_test2.csv");
Frame score = null;
try {
Scope.enter();
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "response";
// params._response = fr.find(params._response_column);
params._ignored_columns = new String[] { "ID" };
params._train = fr._key;
params._lambda = new double[] { 0 };
params._standardize = false;
params._max_iterations = 20;
GLM glm = new GLM(params);
model = glm.trainModel().get();
double[] beta = model.beta();
System.out.println("beta = " + Arrays.toString(beta));
assertEquals(auc(model), 1, 1e-4);
score = model.score(fr);
hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, fr);
hex.AUC2 adata = mm._auc;
assertEquals(auc(model), adata._auc, 1e-2);
} finally {
fr.remove();
if (model != null)
model.delete();
if (score != null)
score.delete();
Scope.exit();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testCars.
//------------ TEST on selected files form small data and compare to R results ------------------------------------
/**
* Simple test for poisson, gamma and gaussian families (no regularization, test both lsm solvers).
* Basically tries to predict horse power based on other parameters of the cars in the dataset.
* Compare against the results from standard R glm implementation.
*
* @throws ExecutionException
* @throws InterruptedException
*/
@Test
public void testCars() throws InterruptedException, ExecutionException {
Scope.enter();
Key parsed = Key.make("cars_parsed");
Frame fr = null;
GLMModel model = null;
Frame score = null;
try {
fr = parse_test_file(parsed, "smalldata/junit/cars.csv");
GLMParameters params = new GLMParameters(Family.poisson, Family.poisson.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
params._response_column = "power (hp)";
// params._response = fr.find(params._response_column);
params._ignored_columns = new String[] { "name" };
params._train = parsed;
params._lambda = new double[] { 0 };
params._alpha = new double[] { 0 };
params._missing_values_handling = MissingValuesHandling.Skip;
model = new GLM(params).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
String[] cfs1 = new String[] { "Intercept", "economy (mpg)", "cylinders", "displacement (cc)", "weight (lb)", "0-60 mph (s)", "year" };
double[] vls1 = new double[] { 4.9504805, -0.0095859, -0.0063046, 0.0004392, 0.0001762, -0.0469810, 0.0002891 };
for (int i = 0; i < cfs1.length; ++i) assertEquals(vls1[i], coefs.get(cfs1[i]), 1e-4);
// test gamma
double[] vls2 = new double[] { 8.992e-03, 1.818e-04, -1.125e-04, 1.505e-06, -1.284e-06, 4.510e-04, -7.254e-05 };
testScoring(model, fr);
model.delete();
params = new GLMParameters(Family.gamma, Family.gamma.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
params._response_column = "power (hp)";
// params._response = fr.find(params._response_column);
params._ignored_columns = new String[] { "name" };
params._train = parsed;
params._lambda = new double[] { 0 };
params._beta_epsilon = 1e-5;
params._missing_values_handling = MissingValuesHandling.Skip;
model = new GLM(params).trainModel().get();
coefs = model.coefficients();
for (int i = 0; i < cfs1.length; ++i) assertEquals(vls2[i], coefs.get(cfs1[i]), 1e-4);
testScoring(model, fr);
model.delete();
// test gaussian
double[] vls3 = new double[] { 166.95862, -0.00531, -2.46690, 0.12635, 0.02159, -4.66995, -0.85724 };
params = new GLMParameters(Family.gaussian);
params._response_column = "power (hp)";
// params._response = fr.find(params._response_column);
params._ignored_columns = new String[] { "name" };
params._train = parsed;
params._lambda = new double[] { 0 };
params._missing_values_handling = MissingValuesHandling.Skip;
model = new GLM(params).trainModel().get();
coefs = model.coefficients();
for (int i = 0; i < cfs1.length; ++i) assertEquals(vls3[i], coefs.get(cfs1[i]), 1e-4);
// test scoring
} finally {
if (fr != null)
fr.delete();
if (score != null)
score.delete();
if (model != null)
model.delete();
Scope.exit();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testCitibikeReproPUBDEV1953.
@Test
public void testCitibikeReproPUBDEV1953() throws Exception {
GLMModel model = null;
Frame tfr = parse_test_file("smalldata/glm_test/citibike_small_train.csv");
Frame vfr = parse_test_file("smalldata/glm_test/citibike_small_test.csv");
try {
Scope.enter();
GLMParameters params = new GLMParameters(Family.poisson);
params._response_column = "bikes";
params._train = tfr._key;
params._valid = vfr._key;
params._family = Family.poisson;
GLM glm = new GLM(params);
model = glm.trainModel().get();
testScoring(model, vfr);
} finally {
tfr.remove();
vfr.remove();
if (model != null)
model.delete();
Scope.exit();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method test_COD_Airlines_SingleLambda_CovUpdates.
@Test
public void test_COD_Airlines_SingleLambda_CovUpdates() {
GLMModel model1 = null;
// Distance + Origin + Dest + UniqueCarrier
Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip");
String[] ignoredCols = new String[] { "IsDepDelayed_REC" };
try {
Scope.enter();
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "IsDepDelayed";
params._ignored_columns = ignoredCols;
params._train = fr._key;
params._valid = fr._key;
//null; //new double[]{0.02934};//{0.02934494}; // null;
params._lambda = new double[] { 0.01 };
params._alpha = new double[] { 1 };
params._standardize = false;
params._solver = Solver.COORDINATE_DESCENT;
params._lambda_search = true;
GLM glm = new GLM(params);
model1 = glm.trainModel().get();
double[] beta = model1.beta();
double l1pen = ArrayUtils.l1norm(beta, true);
double l2pen = ArrayUtils.l2norm2(beta, true);
// double objective = job.likelihood()/model1._nobs +
// params._l2pen[params._l2pen.length-1]*params._alpha[0]*l1pen + params._l2pen[params._l2pen.length-1]*(1-params._alpha[0])*l2pen/2 ;
// System.out.println( " objective value " + objective);
// assertEquals(0.670921, objective,1e-2);
} finally {
fr.delete();
if (model1 != null)
model1.delete();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testAirlines.
// test categorical autoexpansions, run on airlines which has several categorical columns,
// once on explicitly expanded data, once on h2o autoexpanded and compare the results
@Test
public void testAirlines() {
GLMModel model1 = null, model2 = null, model3 = null, model4 = null;
Frame frMM = parse_test_file(Key.make("AirlinesMM"), "smalldata/airlines/AirlinesTrainMM.csv.zip");
Frame frG = parse_test_file(Key.make("gram"), "smalldata/airlines/gram_std.csv", true);
Vec xy = frG.remove("xy");
frMM.remove("C1").remove();
Vec v;
frMM.add("IsDepDelayed", (v = frMM.remove("IsDepDelayed")).makeCopy(null));
v.remove();
DKV.put(frMM._key, frMM);
Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip"), res = null;
fr.add("IsDepDelayed", (v = fr.remove("IsDepDelayed")).makeCopy(null));
v.remove();
DKV.put(fr._key, fr);
// Distance + Origin + Dest + UniqueCarrier
String[] ignoredCols = new String[] { "fYear", "fMonth", "fDayofMonth", "fDayOfWeek", "DepTime", "ArrTime", "IsDepDelayed_REC" };
try {
Scope.enter();
GLMParameters params = new GLMParameters(Family.gaussian);
params._response_column = "IsDepDelayed";
params._ignored_columns = ignoredCols;
params._train = fr._key;
params._lambda = new double[] { 0 };
params._alpha = new double[] { 0 };
params._standardize = false;
params._use_all_factor_levels = false;
model1 = new GLM(params).trainModel().get();
testScoring(model1, fr);
Frame score1 = model1.score(fr);
ModelMetricsRegressionGLM mm = (ModelMetricsRegressionGLM) ModelMetrics.getFromDKV(model1, fr);
Assert.assertEquals(((ModelMetricsRegressionGLM) model1._output._training_metrics)._resDev, mm._resDev, 1e-4);
Assert.assertEquals(((ModelMetricsRegressionGLM) model1._output._training_metrics)._resDev, mm._MSE * score1.numRows(), 1e-4);
score1.delete();
mm.remove();
res = model1.score(fr);
// Build a POJO, validate same results
params._train = frMM._key;
params._ignored_columns = new String[] { "X" };
model2 = new GLM(params).trainModel().get();
HashMap<String, Double> coefs1 = model1.coefficients();
testScoring(model2, frMM);
HashMap<String, Double> coefs2 = model2.coefficients();
boolean failed = false;
// compare against each other
for (String s : coefs2.keySet()) {
String s1 = s;
if (s.startsWith("Origin"))
s1 = "Origin." + s.substring(6);
if (s.startsWith("Dest"))
s1 = "Dest." + s.substring(4);
if (s.startsWith("UniqueCarrier"))
s1 = "UniqueCarrier." + s.substring(13);
if (Math.abs(coefs1.get(s1) - coefs2.get(s)) > 1e-4) {
System.out.println("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s));
failed = true;
}
// assertEquals("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s), coefs1.get(s1), coefs2.get(s), 1e-4);
}
assertFalse(failed);
params._standardize = true;
params._train = frMM._key;
params._use_all_factor_levels = true;
// test the gram
DataInfo dinfo = new DataInfo(frMM, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
GLMIterationTask glmt = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).doAll(dinfo._adaptedFrame);
for (int i = 0; i < glmt._xy.length; ++i) {
for (int j = 0; j <= i; ++j) {
assertEquals(frG.vec(j).at(i), glmt._gram.get(i, j), 1e-5);
}
assertEquals(xy.at(i), glmt._xy[i], 1e-5);
}
xy.remove();
params = (GLMParameters) params.clone();
params._standardize = false;
params._family = Family.binomial;
params._link = Link.logit;
model3 = new GLM(params).trainModel().get();
testScoring(model3, frMM);
params._train = fr._key;
params._ignored_columns = ignoredCols;
model4 = new GLM(params).trainModel().get();
testScoring(model4, fr);
assertEquals(nullDeviance(model3), nullDeviance(model4), 1e-4);
assertEquals(residualDeviance(model4), residualDeviance(model3), nullDeviance(model3) * 1e-3);
assertEquals(nullDeviance(model1), nullDeviance(model2), 1e-4);
assertEquals(residualDeviance(model1), residualDeviance(model2), 1e-4);
// assertEquals(val1._aic, val2._aic,1e-2);
// compare result against glmnet
assertEquals(5336.918, residualDeviance(model1), 1);
assertEquals(6051.613, nullDeviance(model2), 1);
// lbfgs
// params._solver = Solver.L_BFGS;
// params._train = fr._key;
// params._lambda = new double[]{.3};
// model3 = new GLM(params,glmkey("lbfgs_cat")).trainModel().get();
// params._train = frMM._key;
// mdoel4 = new GLM(params,glmkey("lbfgs_mm")).trainModel().get();
// HashMap<String, Double> coefs3 = model3.coefficients();
// HashMap<String, Double> coefs4 = model4.coefficients();
// // compare against each other
// for(String s:coefs4.keySet()) {
// String s1 = s;
// if(s.startsWith("Origin"))
// s1 = "Origin." + s.substring(6);
// if(s.startsWith("Dest"))
// s1 = "Dest." + s.substring(4);
// if(s.startsWith("UniqueCarrier"))
// s1 = "UniqueCarrier." + s.substring(13);
// assertEquals("coeff " + s1 + " differs, " + coefs3.get(s1) + " != " + coefs4.get(s), coefs3.get(s1), coefs4.get(s),1e-4);
// }
} finally {
fr.delete();
frMM.delete();
frG.delete();
if (res != null)
res.delete();
if (model1 != null)
model1.delete();
if (model2 != null)
model2.delete();
if (model3 != null)
model3.delete();
if (model4 != null)
model4.delete();
// if(score != null)score.delete();
Scope.exit();
}
}
Aggregations