use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNARight.
// PUBDEV-2822
@Test
public void testNARight() {
String xy = ",10\n1,0\n2,0\n3,0\n4,10\n,10";
Key tr = Key.make("train");
Frame df = ParseDataset.parse(tr, makeByteVec(Key.make("xy"), xy));
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tr;
parms._response_column = "C2";
parms._min_rows = 1;
parms._learn_rate = 1;
parms._ntrees = 1;
GBM job = new GBM(parms);
GBMModel gbm = job.trainModel().get();
Frame preds = gbm.score(df);
Log.info(df);
Log.info(preds);
Assert.assertTrue(gbm.testJavaScoring(df, preds, 1e-15));
Assert.assertTrue(preds.vec(0).at(0) == 10);
Assert.assertTrue(preds.vec(0).at(1) == 0);
Assert.assertTrue(preds.vec(0).at(2) == 0);
Assert.assertTrue(preds.vec(0).at(3) == 0);
Assert.assertTrue(preds.vec(0).at(4) == 10);
Assert.assertTrue(preds.vec(0).at(5) == 10);
preds.remove();
gbm.remove();
df.remove();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method unseenMissing.
@Test
public void unseenMissing() {
GBMModel gbm = null;
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
Frame train = null, test = null, train_preds = null, test_preds = null;
Scope.enter();
try {
{
CreateFrame cf = new CreateFrame();
cf.rows = 100;
cf.cols = 10;
cf.integer_range = 1000;
cf.categorical_fraction = 1.0;
cf.integer_fraction = 0.0;
cf.binary_fraction = 0.0;
cf.time_fraction = 0.0;
cf.string_fraction = 0.0;
cf.binary_ones_fraction = 0.0;
cf.missing_fraction = 0.0;
cf.factors = 3;
cf.response_factors = 2;
cf.positive_response = false;
cf.has_response = true;
cf.seed = 1235;
cf.seed_for_column_types = 1234;
train = cf.execImpl().get();
}
{
CreateFrame cf = new CreateFrame();
cf.rows = 100;
cf.cols = 10;
cf.integer_range = 1000;
cf.categorical_fraction = 1.0;
cf.integer_fraction = 0.0;
cf.binary_fraction = 0.0;
cf.time_fraction = 0.0;
cf.string_fraction = 0.0;
cf.binary_ones_fraction = 0.0;
cf.missing_fraction = 0.8;
cf.factors = 3;
cf.response_factors = 2;
cf.positive_response = false;
cf.has_response = true;
//different test set
cf.seed = 4321;
cf.seed_for_column_types = 1234;
test = cf.execImpl().get();
}
parms._train = train._key;
// Train on the outcome
parms._response_column = "response";
parms._distribution = DistributionFamily.multinomial;
parms._max_depth = 20;
parms._min_rows = 1;
parms._ntrees = 5;
parms._seed = 1;
GBM job = new GBM(parms);
gbm = job.trainModel().get();
train_preds = gbm.score(train);
test_preds = gbm.score(test);
// Build a POJO, validate same results
Assert.assertTrue(gbm.testJavaScoring(train, train_preds, 1e-15));
Key old = gbm._key;
gbm._key = Key.make(gbm._key + "ha");
Assert.assertTrue(gbm.testJavaScoring(test, test_preds, 1e-15));
DKV.remove(old);
} finally {
if (gbm != null)
gbm.delete();
if (train != null)
train.remove();
if (test != null)
test.remove();
if (train_preds != null)
train_preds.remove();
if (test_preds != null)
test_preds.remove();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testGaussian.
@Test
public void testGaussian() {
Frame tfr = null;
GBMModel gbm = null;
try {
tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = tfr.lastVecName();
parms._seed = 0xdecaf;
parms._distribution = gaussian;
gbm = new GBM(parms).trainModel().get();
Assert.assertEquals(2.9423857564, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 1e-5);
Assert.assertEquals(2.9423857564, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 1e-5);
} finally {
if (tfr != null)
tfr.delete();
if (gbm != null)
gbm.deleteCrossValidationModels();
if (gbm != null)
gbm.delete();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testStochasticGBMHoldout.
@Test
public void testStochasticGBMHoldout() {
Frame tfr = null;
Key[] ksplits = new Key[0];
try {
tfr = parse_test_file("./smalldata/gbm_test/ecology_model.csv");
SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
// Invoke the job
sf.exec().get();
ksplits = sf._destination_frames;
GBMModel gbm = null;
float[] sample_rates = new float[] { 0.2f, 0.4f, 0.8f, 1.0f };
float[] col_sample_rates = new float[] { 0.4f, 0.8f, 1.0f };
float[] col_sample_rates_per_tree = new float[] { 0.4f, 0.6f, 1.0f };
Map<Double, Triple<Float>> hm = new TreeMap<>();
for (float sample_rate : sample_rates) {
for (float col_sample_rate : col_sample_rates) {
for (float col_sample_rate_per_tree : col_sample_rates_per_tree) {
Scope.enter();
try {
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
//regression
parms._response_column = "Angaus";
parms._seed = 42;
parms._min_rows = 2;
parms._max_depth = 12;
parms._ntrees = 6;
parms._col_sample_rate = col_sample_rate;
parms._col_sample_rate_per_tree = col_sample_rate_per_tree;
parms._sample_rate = sample_rate;
// Build a first model; all remaining models should be equal
gbm = new GBM(parms).trainModel().get();
// too slow, but passes (now)
// // Build a POJO, validate same results
// Frame pred = gbm.score(tfr);
// Assert.assertTrue(gbm.testJavaScoring(tfr,pred,1e-15));
// pred.remove();
ModelMetricsRegression mm = (ModelMetricsRegression) gbm._output._validation_metrics;
hm.put(mm.mse(), new Triple<>(sample_rate, col_sample_rate, col_sample_rate_per_tree));
} finally {
if (gbm != null)
gbm.delete();
Scope.exit();
}
}
}
}
Iterator<Map.Entry<Double, Triple<Float>>> it;
Triple<Float> last = null;
// iterator over results (min to max MSE) - best to worst
for (it = hm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<Double, Triple<Float>> n = it.next();
Log.info("MSE: " + n.getKey() + ", row sample: " + n.getValue().v1 + ", col sample: " + n.getValue().v2 + ", col sample per tree: " + n.getValue().v3);
last = n.getValue();
}
// worst validation MSE should belong to the most overfit case (1.0, 1.0, 1.0)
// Assert.assertTrue(last.v1==sample_rates[sample_rates.length-1]);
// Assert.assertTrue(last.v2==col_sample_rates[col_sample_rates.length-1]);
// Assert.assertTrue(last.v3==col_sample_rates_per_tree[col_sample_rates_per_tree.length-1]);
} finally {
if (tfr != null)
tfr.remove();
for (Key k : ksplits) if (k != null)
k.remove();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testHuber.
@Test
public void testHuber() {
Frame tfr = null;
GBMModel gbm = null;
try {
tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tfr._key;
parms._response_column = tfr.lastVecName();
parms._seed = 0xdecaf;
parms._distribution = huber;
//that's the default
parms._huber_alpha = 0.9;
gbm = new GBM(parms).trainModel().get();
Assert.assertEquals(4.447062185, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 1e-5);
Assert.assertEquals(1.962926332, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 1e-4);
} finally {
if (tfr != null)
tfr.delete();
if (gbm != null)
gbm.deleteCrossValidationModels();
if (gbm != null)
gbm.delete();
}
}
Aggregations