use of water.fvec.Frame in project h2o-3 by h2oai.
the class AggregatorTest method testChunks.
@Test
public void testChunks() {
Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = 137;
parms._rel_tol_num_exemplars = 0.05;
long start = System.currentTimeMillis();
// 0.418
AggregatorModel agg = new Aggregator(parms).trainModel().get();
System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
agg.checkConsistency();
Frame output = agg._output._output_frame.get();
checkNumExemplars(agg);
output.remove();
agg.remove();
for (int i : new int[] { 1, 2, 5, 10, 50, 100 }) {
Key key = Key.make();
RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
H2O.submitTask(rb);
rb.join();
Frame rebalanced = DKV.get(key).get();
parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = 137;
parms._rel_tol_num_exemplars = 0.05;
start = System.currentTimeMillis();
// 0.373 0.504 0.357 0.454 0.368 0.355
AggregatorModel agg2 = new Aggregator(parms).trainModel().get();
System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
agg2.checkConsistency();
Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
rebalanced.delete();
Assert.assertTrue(Math.abs(agg._exemplars.length - agg2._exemplars.length) == 0);
output = agg2._output._output_frame.get();
output.remove();
checkNumExemplars(agg);
agg2.remove();
}
frame.delete();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class AggregatorTest method testAggregator.
public void testAggregator(int max) {
CreateFrame cf = new CreateFrame();
cf.rows = 100000;
cf.cols = 2;
cf.categorical_fraction = 0.1;
cf.integer_fraction = 0.3;
cf.real_range = 100;
cf.integer_range = 100;
cf.seed = 1234;
Frame frame = cf.execImpl().get();
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = max;
long start = System.currentTimeMillis();
AggregatorModel agg = new Aggregator(parms).trainModel().get();
System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
agg.checkConsistency();
Frame output = agg._output._output_frame.get();
System.out.println(output.toTwoDimTable(0, 10));
frame.delete();
checkNumExemplars(agg);
output.remove();
agg.remove();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class AggregatorTest method testCovtype.
@Test
public void testCovtype() {
Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._target_num_exemplars = 500;
parms._rel_tol_num_exemplars = 0.05;
long start = System.currentTimeMillis();
// 0.179
AggregatorModel agg = new Aggregator(parms).trainModel().get();
System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
agg.checkConsistency();
frame.delete();
Frame output = agg._output._output_frame.get();
Log.info("Exemplars: " + output.toString());
output.remove();
checkNumExemplars(agg);
agg.remove();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testNACategorical.
// PUBDEV-2822
@Test
public void testNACategorical() {
String xy = ",0\nA,0\nB,0\nA,0\nD,-10\n,0";
Key tr = Key.make("train");
Frame df = ParseDataset.parse(tr, makeByteVec(Key.make("xy"), xy));
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = tr;
parms._response_column = "C2";
parms._min_rows = 1;
parms._learn_rate = 1;
parms._ntrees = 1;
GBM job = new GBM(parms);
GBMModel gbm = job.trainModel().get();
Frame preds = gbm.score(df);
Log.info(df);
Log.info(preds);
Assert.assertTrue(gbm.testJavaScoring(df, preds, 1e-15));
Assert.assertTrue(Math.abs(preds.vec(0).at(0) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(1) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(2) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(3) - 0) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(4) - -10) < 1e-6);
Assert.assertTrue(Math.abs(preds.vec(0).at(5) - 0) < 1e-6);
preds.remove();
gbm.remove();
df.remove();
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class GBMTest method testGBMTrainTest.
// Test-on-Train. Slow test, needed to build a good model.
@Test
public void testGBMTrainTest() {
GBMModel gbm = null;
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
try {
Scope.enter();
parms._valid = parse_test_file("smalldata/gbm_test/ecology_eval.csv")._key;
Frame train = parse_test_file("smalldata/gbm_test/ecology_model.csv");
// Remove unique ID
train.remove("Site").remove();
// Convert response to categorical
int ci = train.find("Angaus");
Scope.track(train.replace(ci, train.vecs()[ci].toCategoricalVec()));
// Update frame after hacking it
DKV.put(train);
parms._train = train._key;
// Train on the outcome
parms._response_column = "Angaus";
parms._ntrees = 5;
parms._max_depth = 5;
parms._min_rows = 10;
parms._nbins = 100;
parms._learn_rate = .2f;
parms._distribution = DistributionFamily.multinomial;
gbm = new GBM(parms).trainModel().get();
hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(gbm, parms.valid());
double auc = mm._auc._auc;
// Sanely good model
Assert.assertTrue(0.83 <= auc && auc < 0.87);
double[][] cm = mm._auc.defaultCM();
Assert.assertArrayEquals(ard(ard(349, 44), ard(43, 64)), cm);
} finally {
parms._train.remove();
parms._valid.remove();
if (gbm != null)
gbm.delete();
Scope.exit();
}
}
Aggregations