use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testGradientTask.
// Make sure all three implementations of ginfo computation in GLM get the same results
@Test
public void testGradientTask() {
Key parsed = Key.make("cars_parsed");
Frame fr = null;
DataInfo dinfo = null;
try {
fr = parse_test_file(parsed, "smalldata/junit/mixcat_train.csv");
GLMParameters params = new GLMParameters(Family.binomial, Family.binomial.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
// params._response = fr.find(params._response_column);
params._train = parsed;
params._lambda = new double[] { 0 };
params._use_all_factor_levels = true;
fr.add("Useless", fr.remove("Useless"));
dinfo = new DataInfo(fr, null, 1, params._use_all_factor_levels || params._lambda_search, params._standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
DKV.put(dinfo._key, dinfo);
double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
Random rnd = new Random(987654321);
for (int i = 0; i < beta.length; ++i) beta[i] = 1 - 2 * rnd.nextDouble();
GLMGradientTask grtSpc = new GLMBinomialGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
GLMGradientTask grtGen = new GLMGenericGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
for (int i = 0; i < beta.length; ++i) assertEquals("gradients differ", grtSpc._gradient[i], grtGen._gradient[i], 1e-4);
params = new GLMParameters(Family.gaussian, Family.gaussian.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
params._use_all_factor_levels = false;
dinfo.remove();
dinfo = new DataInfo(fr, null, 1, params._use_all_factor_levels || params._lambda_search, params._standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
DKV.put(dinfo._key, dinfo);
beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
rnd = new Random(1987654321);
for (int i = 0; i < beta.length; ++i) beta[i] = 1 - 2 * rnd.nextDouble();
grtSpc = new GLMGaussianGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
grtGen = new GLMGenericGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
for (int i = 0; i < beta.length; ++i) assertEquals("gradients differ: " + Arrays.toString(grtSpc._gradient) + " != " + Arrays.toString(grtGen._gradient), grtSpc._gradient[i], grtGen._gradient[i], 1e-4);
dinfo.remove();
} finally {
if (fr != null)
fr.delete();
if (dinfo != null)
dinfo.remove();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testGaussianRegression.
//------------------- simple tests on synthetic data------------------------------------
@Test
public void testGaussianRegression() throws InterruptedException, ExecutionException {
Key raw = Key.make("gaussian_test_data_raw");
Key parsed = Key.make("gaussian_test_data_parsed");
GLMModel model = null;
Frame fr = null, res = null;
try {
// make data so that the expected coefficients is icept = col[0] = 1.0
FVecTest.makeByteVec(raw, "x,y\n0,0\n1,0.1\n2,0.2\n3,0.3\n4,0.4\n5,0.5\n6,0.6\n7,0.7\n8,0.8\n9,0.9");
fr = ParseDataset.parse(parsed, raw);
GLMParameters params = new GLMParameters(Family.gaussian);
params._train = fr._key;
// params._response = 1;
params._response_column = fr._names[1];
params._lambda = new double[] { 0 };
// params._standardize= false;
model = new GLM(params).trainModel().get();
HashMap<String, Double> coefs = model.coefficients();
assertEquals(0.0, coefs.get("Intercept"), 1e-4);
assertEquals(0.1, coefs.get("x"), 1e-4);
testScoring(model, fr);
} finally {
if (fr != null)
fr.remove();
if (res != null)
res.remove();
if (model != null)
model.remove();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testSparseGramComputation.
// // test categorical autoexpansions, run on airlines which has several categorical columns,
// // once on explicitly expanded data, once on h2o autoexpanded and compare the results
// @Test public void testSparseCategoricals() {
// GLMModel model1 = null, model2 = null, model3 = null, model4 = null;
//
// Frame frMM = parse_test_file("smalldata/glm_tets/train-2.csv");
//
//// Vec xy = frG.remove("xy");
// frMM.remove("").remove();
// frMM.add("IsDepDelayed", frMM.remove("IsDepDelayed"));
// DKV.put(frMM._key,frMM);
// Frame fr = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip"), res = null;
// // Distance + Origin + Dest + UniqueCarrier
// String [] ignoredCols = new String[]{"fYear", "fMonth", "fDayofMonth", "fDayOfWeek", "DepTime","ArrTime","IsDepDelayed_REC"};
// try{
// Scope.enter();
// GLMParameters params = new GLMParameters(Family.gaussian);
// params._response_column = "IsDepDelayed";
// params._ignored_columns = ignoredCols;
// params._train = fr._key;
// params._l2pen = new double[]{1e-5};
// params._standardize = false;
// model1 = new GLM(params,glmkey("airlines_cat_nostd")).trainModel().get();
// Frame score1 = model1.score(fr);
// ModelMetricsRegressionGLM mm = (ModelMetricsRegressionGLM) ModelMetrics.getFromDKV(model1, fr);
// Assert.assertEquals(model1.validation().residual_deviance, mm._resDev, 1e-4);
// System.out.println("NDOF = " + model1.validation().nullDOF() + ", numRows = " + score1.numRows());
// Assert.assertEquals(model1.validation().residual_deviance, mm._MSE * score1.numRows(), 1e-4);
// mm.remove();
// res = model1.score(fr);
// // Build a POJO, validate same results
// Assert.assertTrue(model1.testJavaScoring(fr, res, 1e-15));
//
// params._train = frMM._key;
// params._ignored_columns = new String[]{"X"};
// model2 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
// params._standardize = true;
// params._train = frMM._key;
// params._use_all_factor_levels = true;
// // test the gram
// DataInfo dinfo = new DataInfo(Key.make(),frMM, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true);
// GLMIterationTask glmt = new GLMIterationTask(null,dinfo,1e-5,params,false,null,0,null, null).doAll(dinfo._adaptedFrame);
// for(int i = 0; i < glmt._xy.length; ++i) {
// for(int j = 0; j <= i; ++j ) {
// assertEquals(frG.vec(j).at(i), glmt._gram.get(i, j), 1e-5);
// }
// assertEquals(xy.at(i), glmt._xy[i], 1e-5);
// }
// frG.delete();
// xy.remove();
// params._standardize = true;
// params._family = Family.binomial;
// params._link = Link.logit;
// model3 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
// params._train = fr._key;
// params._ignored_columns = ignoredCols;
// model4 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
// assertEquals(model3.validation().null_deviance,model4.validation().nullDeviance(),1e-4);
// assertEquals(model4.validation().residual_deviance, model3.validation().residualDeviance(), model3.validation().null_deviance * 1e-3);
// HashMap<String, Double> coefs1 = model1.coefficients();
// HashMap<String, Double> coefs2 = model2.coefficients();
// GLMValidation val1 = model1.validation();
// GLMValidation val2 = model2.validation();
// // compare against each other
// for(String s:coefs2.keySet()) {
// String s1 = s;
// if(s.startsWith("Origin"))
// s1 = "Origin." + s.substring(6);
// if(s.startsWith("Dest"))
// s1 = "Dest." + s.substring(4);
// if(s.startsWith("UniqueCarrier"))
// s1 = "UniqueCarrier." + s.substring(13);
// assertEquals("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s), coefs1.get(s1), coefs2.get(s),1e-4);
// DKV.put(frMM._key,frMM); // update the frame in the KV after removing the vec!
// }
// assertEquals(val1.nullDeviance(), val2.nullDeviance(),1e-4);
// assertEquals(val1.residualDeviance(), val2.residualDeviance(),1e-4);
// assertEquals(val1._aic, val2._aic,1e-2);
// // compare result against glmnet
// assertEquals(5336.918,val1.residualDeviance(),1);
// assertEquals(6051.613,val1.nullDeviance(),1);
//
//
// // lbfgs
//// params._solver = Solver.L_BFGS;
//// params._train = fr._key;
//// params._lambda = new double[]{.3};
//// model3 = new GLM(params,glmkey("lbfgs_cat")).trainModel().get();
//// params._train = frMM._key;
//// model4 = new GLM(params,glmkey("lbfgs_mm")).trainModel().get();
//// HashMap<String, Double> coefs3 = model3.coefficients();
//// HashMap<String, Double> coefs4 = model4.coefficients();
//// // compare against each other
//// for(String s:coefs4.keySet()) {
//// String s1 = s;
//// if(s.startsWith("Origin"))
//// s1 = "Origin." + s.substring(6);
//// if(s.startsWith("Dest"))
//// s1 = "Dest." + s.substring(4);
//// if(s.startsWith("UniqueCarrier"))
//// s1 = "UniqueCarrier." + s.substring(13);
//// assertEquals("coeff " + s1 + " differs, " + coefs3.get(s1) + " != " + coefs4.get(s), coefs3.get(s1), coefs4.get(s),1e-4);
//// }
//
// } finally {
// fr.delete();
// frMM.delete();
// if(res != null)res.delete();
// if(model1 != null)model1.delete();
// if(model2 != null)model2.delete();
// if(model3 != null)model3.delete();
// if(model4 != null)model4.delete();
//// if(score != null)score.delete();
// Scope.exit();
// }
// }
/**
* Test we get correct gram on dataset which contains categoricals and sparse and dense numbers
*/
@Test
public void testSparseGramComputation() {
Random rnd = new Random(123456789l);
double[] d0 = MemoryManager.malloc8d(1000);
double[] d1 = MemoryManager.malloc8d(1000);
double[] d2 = MemoryManager.malloc8d(1000);
double[] d3 = MemoryManager.malloc8d(1000);
double[] d4 = MemoryManager.malloc8d(1000);
double[] d5 = MemoryManager.malloc8d(1000);
double[] d6 = MemoryManager.malloc8d(1000);
double[] d7 = MemoryManager.malloc8d(1000);
double[] d8 = MemoryManager.malloc8d(1000);
double[] d9 = MemoryManager.malloc8d(1000);
long[] c1 = MemoryManager.malloc8(1000);
long[] c2 = MemoryManager.malloc8(1000);
String[] dom = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
for (int i = 0; i < d1.length; ++i) {
c1[i] = rnd.nextInt(dom.length);
c2[i] = rnd.nextInt(dom.length);
d0[i] = rnd.nextDouble();
d1[i] = rnd.nextDouble();
}
for (int i = 0; i < 30; ++i) {
d2[rnd.nextInt(d2.length)] = rnd.nextDouble();
d3[rnd.nextInt(d2.length)] = rnd.nextDouble();
d4[rnd.nextInt(d2.length)] = rnd.nextDouble();
d5[rnd.nextInt(d2.length)] = rnd.nextDouble();
d6[rnd.nextInt(d2.length)] = rnd.nextDouble();
d7[rnd.nextInt(d2.length)] = rnd.nextDouble();
d8[rnd.nextInt(d2.length)] = rnd.nextDouble();
d9[rnd.nextInt(d2.length)] = 1;
}
Vec.VectorGroup vg_1 = Vec.VectorGroup.VG_LEN1;
Vec v01 = Vec.makeVec(c1, dom, vg_1.addVec());
Vec v02 = Vec.makeVec(c2, dom, vg_1.addVec());
Vec v03 = Vec.makeVec(d0, vg_1.addVec());
Vec v04 = Vec.makeVec(d1, vg_1.addVec());
Vec v05 = Vec.makeVec(d2, vg_1.addVec());
Vec v06 = Vec.makeVec(d3, vg_1.addVec());
Vec v07 = Vec.makeVec(d4, vg_1.addVec());
Vec v08 = Vec.makeVec(d5, vg_1.addVec());
Vec v09 = Vec.makeVec(d6, vg_1.addVec());
Vec v10 = Vec.makeVec(d7, vg_1.addVec());
Vec v11 = Vec.makeVec(d8, vg_1.addVec());
Vec v12 = Vec.makeVec(d9, vg_1.addVec());
Frame f = new Frame(Key.<Frame>make("TestData"), null, new Vec[] { v01, v02, v03, v04, v05, v05, v06, v07, v08, v09, v10, v11, v12 });
DKV.put(f);
DataInfo dinfo = new DataInfo(f, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
GLMParameters params = new GLMParameters(Family.gaussian);
// public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw,double [] beta, double lambda) {
final GLMIterationTask glmtSparse = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).setSparse(true).doAll(dinfo._adaptedFrame);
final GLMIterationTask glmtDense = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).setSparse(false).doAll(dinfo._adaptedFrame);
for (int i = 0; i < glmtDense._xy.length; ++i) {
for (int j = 0; j <= i; ++j) {
assertEquals(glmtDense._gram.get(i, j), glmtSparse._gram.get(i, j), 1e-8);
}
assertEquals(glmtDense._xy[i], glmtSparse._xy[i], 1e-8);
}
final double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
// now do the same but weighted, use LSM solution as beta to generate meaningfull weights
H2O.submitTask(new H2OCountedCompleter() {
@Override
public void compute2() {
new GLM.GramSolver(glmtDense._gram, glmtDense._xy, true, 1e-5, 0, null, null, null, null).solve(null, beta);
tryComplete();
}
}).join();
final GLMIterationTask glmtSparse2 = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), beta).setSparse(true).doAll(dinfo._adaptedFrame);
final GLMIterationTask glmtDense2 = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), beta).setSparse(false).doAll(dinfo._adaptedFrame);
for (int i = 0; i < glmtDense2._xy.length; ++i) {
for (int j = 0; j <= i; ++j) {
assertEquals(glmtDense2._gram.get(i, j), glmtSparse2._gram.get(i, j), 1e-8);
}
assertEquals(glmtDense2._xy[i], glmtSparse2._xy[i], 1e-8);
}
dinfo.remove();
f.delete();
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method testCoordinateDescent_airlines.
@Test
public void testCoordinateDescent_airlines() {
GLMModel model = null;
Key parsed = Key.make("airlines_parsed");
Key<GLMModel> modelKey = Key.make("airlines_model");
Frame fr = parse_test_file(parsed, "smalldata/airlines/AirlinesTrain.csv.zip");
try {
// H2O differs on intercept and race, same residual deviance though
GLMParameters params = new GLMParameters();
params._standardize = true;
params._family = Family.binomial;
params._solver = Solver.COORDINATE_DESCENT_NAIVE;
params._response_column = "IsDepDelayed";
params._ignored_columns = new String[] { "IsDepDelayed_REC" };
params._train = fr._key;
GLM glm = new GLM(params, modelKey);
model = glm.trainModel().get();
assertTrue(glm.isStopped());
System.out.println(model._output._training_metrics);
} finally {
fr.delete();
if (model != null)
model.delete();
}
}
use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.
the class GLMTest method test_COD_Airlines_LambdaSearch.
@Test
public void test_COD_Airlines_LambdaSearch() {
GLMModel model1 = null;
// Distance + Origin + Dest + UniqueCarrier
Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip");
String[] ignoredCols = new String[] { "IsDepDelayed_REC" };
try {
Scope.enter();
GLMParameters params = new GLMParameters(Family.binomial);
params._response_column = "IsDepDelayed";
params._ignored_columns = ignoredCols;
params._train = fr._key;
params._valid = fr._key;
// new double [] {0.25};
params._lambda = null;
params._alpha = new double[] { 1 };
params._standardize = false;
//IRLSM
params._solver = Solver.COORDINATE_DESCENT_NAIVE;
params._lambda_search = true;
params._nlambdas = 5;
GLM glm = new GLM(params);
model1 = glm.trainModel().get();
GLMModel.Submodel sm = model1._output._submodels[model1._output._submodels.length - 1];
double[] beta = sm.beta;
System.out.println("lambda " + sm.lambda_value);
double l1pen = ArrayUtils.l1norm(beta, true);
double l2pen = ArrayUtils.l2norm2(beta, true);
// double objective = job.likelihood()/model1._nobs + // gives likelihood of the last lambda
// params._l2pen[params._l2pen.length-1]*params._alpha[0]*l1pen + params._l2pen[params._l2pen.length-1]*(1-params._alpha[0])*l2pen/2 ;
// assertEquals(0.65689, objective,1e-4);
} finally {
fr.delete();
if (model1 != null)
model1.delete();
}
}
Aggregations