use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMBasicTestRegression method testSparse.
@Test
public void testSparse() {
double[] exp_coefs = new double[] { 0.0233151691783671, -0.00543776852277619, -0.0137359312181047, 0.00770037200907652, 0.0328856331139761, -0.0242845468071283, -0.0101698117745265, 0.00868844870137727, 0.000349121513384513, -0.0106962199761512, -0.00705001448025939, 0.00821574637914086, 0.00601015905212279, 0.0021278467162546, -0.0233079168835112, 0.00535473896013676, -0.00897667301004576, 0.00788272228017582, 0.00237442711371947, -0.013136425134371, 0.00134003869245749, 0.0240118046676911, 0.000607214787933269, -0.0112908513868027, 0.000443119443631777, 0.00749330452744921, -0.00558704122833295, 0.000533036850835694, 0.0130008059852934, -4.40634889376063e-05, -0.00580285872202347, 0.0117029111583238, -0.00685480666428133, 0.00809526311326634, -0.0088567165389072, -0.0363126456378731, -0.00267237519808936, -0.01669554043682, 0.00556943053195684, 0.0178196407614288, -0.000903204442155076, -0.0085363297586185, -0.00421147221966977, -0.00828702756129772, 0.017027928644479, 0.00710126315700672, 0.019819043342772, -0.0165232485929677, 0.00439570108491533, 0.0188325734374437, 0.00799712968759025, -0.0100388875424171, -0.0062415137856855, -0.00258013659839137, -6.58516379178382e-05, 0.0135032332096949, -0.00776869619293087, -0.00544035128543343, -0.0110626226606883, -0.00768490011210769, -0.00684181016695251, -0.0144627862333649, -0.0262830557415184, -0.0102290180164706, 0.00368252955770187, 0.015824495748353, 0.00383484095683782, 0.0151193905626625, -0.00615077094420626, 0.0142842231522414, 0.00150448184871646, 0.0521491615912011, 0.0128661232226479, 0.00225580439739044, -0.0117476427864401, -0.0059792656068627, 0.000787012740598272, 0.00255419488737936, 0.00406033118385186, 0.0102551045653601, 0.00423949002681056, -0.0116986428989079, 0.00232448128787425, -0.00296198808290357, -0.00793738689381332, -0.000771158906679964, 0.00435708760153937, -0.0138922325725763, 0.00264561130131037, -0.0156128295187466, -0.0102023187068811, 0.0074744189329328, 0.0102377749189598, -0.0304739969497646, 0.00692556661464647, 0.00151065993974025, 0.0133704258946895, -0.0167391228441308, 0.0111804482435337, -0.0062469732087272, -0.00930165243463748, -0.00418698783410104, 0.00190918091726462, 0.00632982717347925, -0.00277608255480933, -0.00175463261672652, -0.00267223587651978, -0.00329264073314718, 0.000960091877616874, -0.00946014799557438, -0.0112302467393988, -0.00870512647578646, -0.00238582834931644, -0.0100845163232815, -0.00675861103174491, -0.000689229731411459, 0.0127651588318169, -0.0062753105816655, -0.00240575758827749, 0.00439570108491531, 0.00934971690544427, -0.0184380964678117, -0.00474253892124699, 0.00522916014066936, -0.0105148336464531, 0.0088372219244051, 0.0100429095740915, -0.0107657032259033, -0.00512476269437683, -0.00558487620671732, -0.000637298812579742, -0.00118460090105795, -0.00369801350318738, -0.00556276860695209, 0.00789011470305446, -0.00248367841256358, 0.00677762904717052, -0.00640135771848287, 0.00797532960057465, -0.00117508910987595, 0.000986931150778778, -0.0148237721063735, 0.0053001635341953, -0.0139698571439444, -0.0172255105183439, -0.0177416268392445, -0.0107062660197562, -0.00735448768491512, -0.00418482390542493, 0.00933957546887131, -0.00761657876743367, 0.0107862806984669, 6.99667442150322e-05, -0.00151054027221715, 0.00941377216029456, 0.0112882845381545, 0.0014423575345095, 0.00845773223444363, -0.00675939077916714, -0.00329806028742896, 0.000276998824889068, 0.00206337643122044, -0.00173085772672239, 0.00169616445468346, 0.00281297187309321, -0.0152343998246272, 0.0126261762792184, -0.000224959505615703, -0.00476466349783071, -0.0102541605421868, -0.000561674281900828, 0.00367777757696579, -0.000960272764476094, 0.00255704179717728, -0.000696266184051808, 0.0470920125432207, 0.0115016691642458, -0.00287666464467251, -0.00132912286075637, 0.00201932482935891, 0.00119899092739739, 0.00380417340899902, -0.00394363983208331, -0.00294543812868618, -1.77894150438862e-05, -0.00455002740798846, 0.000613307426862812, 0.00348274063618593, 0.00161877234851832, 0.0231608701706833, -0.00390062462708628, 0.00244047437999614, -0.00143984617445982, -0.00221831741496412, -0.00744853810342609, -0.00575689075773469, -0.00567890661011033, 0.00384589889309526, -0.00173241442296732, -0.00526995531653655, -0.00310819786514896, 0.00740596461822877, -0.0790037392468225, 0.0239744234187787, 0.0514310481067108, 0.034335426530007, 0.0254604884688754, 0.0531375235023675, -0.0228335779154641, 0.546865402727144 };
GLMModel model1 = null;
GLMParameters parms = new GLMParameters(Family.gaussian);
_airlinesMM.add("weights", _airlinesMM.anyVec().makeCon(1.0));
DKV.put(_airlinesMM._key, _airlinesMM);
parms._weights_column = "weights";
parms._train = _airlinesMM._key;
parms._lambda = new double[] { 1e-2 };
parms._alpha = new double[] { 0 };
parms._solver = Solver.IRLSM;
parms._ignored_columns = new String[] { "C1" };
// parms._remove_collinear_columns = true;
parms._response_column = "IsDepDelayed";
parms._standardize = true;
parms._objective_epsilon = 0;
parms._gradient_epsilon = 1e-10;
parms._max_iterations = 1000;
parms._missing_values_handling = DeepLearningModel.DeepLearningParameters.MissingValuesHandling.Skip;
try {
model1 = new GLM(parms).trainModel().get();
for (int i = 0; i < model1._output._coefficient_names.length; ++i) assertEquals(exp_coefs[i], model1._output.getNormBeta()[i], Math.abs(exp_coefs[i]) * 1e-8);
} finally {
if (model1 != null)
model1.delete();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMBasicTestRegression method testPValuesTweedie.
@Test
public void testPValuesTweedie() {
// Call:
// glm(formula = Infections ~ ., family = tweedie(var.power = 1.5),
// data = D)
//
// Deviance Residuals:
// Min 1Q Median 3Q Max
// -2.6355 -2.0931 -1.8183 0.5046 4.9458
//
// Coefficients:
// Estimate Std. Error t value Pr(>|t|)
// (Intercept) 1.05665 0.11120 9.502 < 2e-16 ***
// SwimmerOccas -0.25891 0.08455 -3.062 0.00241 **
// LocationNonBeach -0.22185 0.08393 -2.643 0.00867 **
// Age20-24 0.15325 0.10041 1.526 0.12808
// Age25-29 0.07624 0.10099 0.755 0.45096
// SexMale 0.03908 0.08619 0.453 0.65058
// ---
// Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
//
// (Dispersion parameter for Tweedie family taken to be 2.896306)
//
// Null deviance: 967.05 on 286 degrees of freedom
// Residual deviance: 908.86 on 281 degrees of freedom
// AIC: NA
//
// Number of Fisher Scoring iterations: 7
double[] sderr_exp = new double[] { 0.11120211, 0.08454967, 0.08393315, 0.10041150, 0.10099231, 0.08618960 };
double[] zvals_exp = new double[] { 9.5021062, -3.0622693, -2.6431794, 1.5262357, 0.7548661, 0.4534433 };
double[] pvals_exp = new double[] { 9.508400e-19, 2.409514e-03, 8.674149e-03, 1.280759e-01, 4.509615e-01, 6.505795e-01 };
GLMParameters parms = new GLMParameters(Family.tweedie);
parms._tweedie_variance_power = 1.5;
parms._tweedie_link_power = 1 - parms._tweedie_variance_power;
parms._train = _earinf._key;
parms._standardize = false;
parms._lambda = new double[] { 0 };
parms._alpha = new double[] { 0 };
parms._response_column = "Infections";
parms._compute_p_values = true;
parms._objective_epsilon = 0;
parms._missing_values_handling = DeepLearningModel.DeepLearningParameters.MissingValuesHandling.Skip;
GLMModel model = null;
Frame predict = null;
try {
model = new GLM(parms).trainModel().get();
String[] names_expected = new String[] { "Intercept", "Swimmer.Occas", "Location.NonBeach", "Age.20-24", "Age.25-29", "Sex.Male" };
String[] names_actual = model._output.coefficientNames();
HashMap<String, Integer> coefMap = new HashMap<>();
for (int i = 0; i < names_expected.length; ++i) coefMap.put(names_expected[i], i);
double[] stder_actual = model._output.stdErr();
double[] zvals_actual = model._output.zValues();
double[] pvals_actual = model._output.pValues();
for (int i = 0; i < sderr_exp.length; ++i) {
int id = coefMap.get(names_actual[i]);
assertEquals(sderr_exp[id], stder_actual[i], sderr_exp[id] * 1e-3);
assertEquals(zvals_exp[id], zvals_actual[i], Math.abs(zvals_exp[id]) * 1e-3);
assertEquals(pvals_exp[id], pvals_actual[i], Math.max(1e-8, pvals_exp[id]) * 5e-3);
}
predict = model.score(parms._train.get());
Vec.Reader r = predict.vec("StdErr").new Reader();
for (int i = 0; i < 10; i++) System.out.println(tweedie_se_fit[i] + " ?=? " + r.at(i));
for (int i = 0; i < tweedie_se_fit.length; ++i) assertEquals(tweedie_se_fit[i], r.at(i), 1e-4);
} finally {
if (model != null)
model.delete();
if (predict != null)
predict.delete();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testProstate.
/**
* Simple test for binomial family (no regularization, test both lsm solvers).
* Runs the classical prostate, using dataset with race replaced by categoricals (probably as it's supposed to be?), in any case,
* it gets to test correct processing of categoricals.
*
* Compare against the results from standard R glm implementation.
* @throws ExecutionException
* @throws InterruptedException
*/
@Test
public void testProstate() throws InterruptedException, ExecutionException {
GLMModel model = null, model2 = null, model3 = null, model4 = null;
Frame fr = parse_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
try {
Scope.enter();
// R results
// Coefficients:
// (Intercept) ID AGE RACER2 RACER3 DPROS DCAPS PSA VOL GLEASON
// -8.894088 0.001588 -0.009589 0.231777 -0.459937 0.556231 0.556395 0.027854 -0.011355 1.010179
String[] cfs1 = new String[] { "Intercept", "AGE", "RACE.R2", "RACE.R3", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON" };
double[] vals = new double[] { -8.14867, -0.01368, 0.32337, -0.38028, 0.55964, 0.49548, 0.02794, -0.01104, 0.97704 };
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "CAPSULE";
params._ignored_columns = new String[] { "ID" };
params._train = fr._key;
params._lambda = new double[] { 0 };
params._standardize = false;
// params._missing_values_handling = MissingValuesHandling.Skip;
GLM glm = new GLM(params);
model = glm.trainModel().get();
assertTrue(model._output.bestSubmodel().iteration == 5);
model.delete();
params._max_iterations = 4;
glm = new GLM(params);
model = glm.trainModel().get();
assertTrue(model._output.bestSubmodel().iteration == 4);
System.out.println(model._output._model_summary);
HashMap<String, Double> coefs = model.coefficients();
System.out.println(coefs);
for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), 1e-4);
assertEquals(512.3, nullDeviance(model), 1e-1);
assertEquals(378.3, residualDeviance(model), 1e-1);
assertEquals(371, resDOF(model), 0);
assertEquals(396.3, aic(model), 1e-1);
testScoring(model, fr);
// test scoring
model.score(fr).delete();
hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, fr);
hex.AUC2 adata = mm._auc;
assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(0.7588625640559653, adata.pr_auc(), 1e-8);
assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
model.score(fr).delete();
mm = hex.ModelMetricsBinomial.getFromDKV(model, fr);
assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
double prior = 1e-5;
params._prior = prior;
// test the same data and model with prior, should get the same model except for the intercept
glm = new GLM(params);
model2 = glm.trainModel().get();
for (int i = 0; i < model2.beta().length - 1; ++i) assertEquals(model.beta()[i], model2.beta()[i], 1e-8);
assertEquals(model.beta()[model.beta().length - 1] - Math.log(model._ymu[0] * (1 - prior) / (prior * (1 - model._ymu[0]))), model2.beta()[model.beta().length - 1], 1e-10);
// run with lambda search, check the final submodel
params._lambda_search = true;
params._lambda = null;
params._alpha = new double[] { 0 };
params._prior = -1;
params._obj_reg = -1;
params._max_iterations = 500;
params._objective_epsilon = 1e-6;
// test the same data and model with prior, should get the same model except for the intercept
glm = new GLM(params);
model3 = glm.trainModel().get();
double lambda = model3._output._submodels[model3._output._best_lambda_idx].lambda_value;
params._lambda_search = false;
params._lambda = new double[] { lambda };
ModelMetrics mm3 = ModelMetrics.getFromDKV(model3, fr);
assertEquals("mse don't match, " + model3._output._training_metrics._MSE + " != " + mm3._MSE, model3._output._training_metrics._MSE, mm3._MSE, 1e-8);
assertEquals("res-devs don't match, " + ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev + " != " + ((ModelMetricsBinomialGLM) mm3)._resDev, ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm3)._resDev, 1e-4);
fr.add("CAPSULE", fr.remove("CAPSULE"));
fr.remove("ID").remove();
DKV.put(fr._key, fr);
DataInfo dinfo = new DataInfo(fr, null, 1, true, TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
model3.score(fr).delete();
mm3 = ModelMetrics.getFromDKV(model3, fr);
assertEquals("mse don't match, " + model3._output._training_metrics._MSE + " != " + mm3._MSE, model3._output._training_metrics._MSE, mm3._MSE, 1e-8);
assertEquals("res-devs don't match, " + ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev + " != " + ((ModelMetricsBinomialGLM) mm3)._resDev, ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm3)._resDev, 1e-4);
// test the same data and model with prior, should get the same model except for the intercept
glm = new GLM(params);
model4 = glm.trainModel().get();
assertEquals("mse don't match, " + model3._output._training_metrics._MSE + " != " + model4._output._training_metrics._MSE, model3._output._training_metrics._MSE, model4._output._training_metrics._MSE, 1e-6);
assertEquals("res-devs don't match, " + ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev + " != " + ((ModelMetricsBinomialGLM) model4._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) model3._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) model4._output._training_metrics)._resDev, 1e-4);
model4.score(fr).delete();
ModelMetrics mm4 = ModelMetrics.getFromDKV(model4, fr);
assertEquals("mse don't match, " + mm3._MSE + " != " + mm4._MSE, mm3._MSE, mm4._MSE, 1e-6);
assertEquals("res-devs don't match, " + ((ModelMetricsBinomialGLM) mm3)._resDev + " != " + ((ModelMetricsBinomialGLM) mm4)._resDev, ((ModelMetricsBinomialGLM) mm3)._resDev, ((ModelMetricsBinomialGLM) mm4)._resDev, 1e-4);
// GLMValidation val2 = new GLMValidationTsk(params,model._ymu,rank(model.beta())).doAll(new Vec[]{fr.vec("CAPSULE"),score.vec("1")})._val;
// assertEquals(val.residualDeviance(),val2.residualDeviance(),1e-6);
// assertEquals(val.nullDeviance(),val2.nullDeviance(),1e-6);
} finally {
fr.delete();
if (model != null)
model.delete();
if (model2 != null)
model2.delete();
if (model3 != null)
model3.delete();
if (model4 != null)
model4.delete();
Scope.exit();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testAllNAs.
//// //simple tweedie test
// @Test public void testTweedieRegression() throws InterruptedException, ExecutionException{
// Key raw = Key.make("gaussian_test_data_raw");
// Key parsed = Key.make("gaussian_test_data_parsed");
// Key<GLMModel> modelKey = Key.make("gaussian_test");
// Frame fr = null;
// GLMModel model = null;
// try {
// // make data so that the expected coefficients is icept = col[0] = 1.0
// FVecTest.makeByteVec(raw, "x,y\n0,0\n1,0.1\n2,0.2\n3,0.3\n4,0.4\n5,0.5\n6,0.6\n7,0.7\n8,0.8\n9,0.9\n0,0\n1,0\n2,0\n3,0\n4,0\n5,0\n6,0\n7,0\n8,0\n9,0");
// fr = ParseDataset.parse(parsed, new Key[]{raw});
// double [] powers = new double [] {1.5,1.1,1.9};
// double [] intercepts = new double []{3.643,1.318,9.154};
// double [] xs = new double []{-0.260,-0.0284,-0.853};
// for(int i = 0; i < powers.length; ++i){
// DataInfo dinfo = new DataInfo(fr, 1, false, DataInfo.TransformType.NONE);
// GLMParameters glm = new GLMParameters(Family.tweedie);
//
// new GLM2("GLM test of gaussian(linear) regression.",Key.make(),modelKey,dinfo,glm,new double[]{0},0).fork().get();
// model = DKV.get(modelKey).get();
// testHTML(model);
// HashMap<String, Double> coefs = model.coefficients();
// assertEquals(intercepts[i],coefs.get("Intercept"),1e-3);
// assertEquals(xs[i],coefs.get("x"),1e-3);
// }
// }finally{
// if( fr != null ) fr.delete();
// if(model != null)model.delete();
// }
// }
@Test
public void testAllNAs() {
Key raw = Key.make("gamma_test_data_raw");
Key parsed = Key.make("gamma_test_data_parsed");
FVecTest.makeByteVec(raw, "x,y,z\n1,0,NA\n2,NA,1\nNA,3,2\n4,3,NA\n5,NA,1\nNA,6,4\n7,NA,9\n8,NA,18\nNA,9,23\n10,31,NA\nNA,11,20\n12,NA,25\nNA,13,37\n14,45,NA\n");
Frame fr = ParseDataset.parse(parsed, raw);
GLM job = null;
try {
GLMParameters params = new GLMParameters(Family.poisson);
// params._response = 1;
params._response_column = fr._names[1];
params._train = parsed;
params._lambda = new double[] { 0 };
params._missing_values_handling = MissingValuesHandling.Skip;
GLM glm = new GLM(params);
glm.trainModel().get();
assertFalse("should've thrown IAE", true);
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains("No rows left in the dataset"));
} finally {
fr.delete();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testCoordinateDescent_airlines_CovUpdates.
@Test
public void testCoordinateDescent_airlines_CovUpdates() {
GLMModel model = null;
Key parsed = Key.make("airlines_parsed");
Key<GLMModel> modelKey = Key.make("airlines_model");
Frame fr = parse_test_file(parsed, "smalldata/airlines/AirlinesTrain.csv.zip");
try {
// H2O differs on intercept and race, same residual deviance though
GLMParameters params = new GLMParameters();
params._standardize = true;
params._family = Family.binomial;
params._solver = Solver.COORDINATE_DESCENT;
params._response_column = "IsDepDelayed";
params._ignored_columns = new String[] { "IsDepDelayed_REC" };
params._train = fr._key;
GLM glm = new GLM(params, modelKey);
model = glm.trainModel().get();
assertTrue(glm.isStopped());
System.out.println(model._output._training_metrics);
} finally {
fr.delete();
if (model != null)
model.delete();
}
}
Aggregations