use of water.fvec.Vec in project h2o-3 by h2oai.
the class GLRMTest method testSubset.
@Ignore
@Test
public void testSubset() throws InterruptedException, ExecutionException {
//Analogous to pyunit_subset_glrm.py
GLRM job = null;
GLRMModel model = null;
Frame train;
InputStream is;
try {
is = new FileInputStream(FileUtils.getFile("bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip"));
UploadFileVec.ReadPutStats stats = new UploadFileVec.ReadPutStats();
UploadFileVec.readPut("train", is, stats);
} catch (Exception e) {
e.printStackTrace();
}
ParseDataset.parse(Key.make("train_parsed"), Key.make("train"));
train = DKV.getGet("train_parsed");
try {
Log.info("num chunks: ", train.anyVec().nChunks());
Vec[] acs_zcta_vec = { train.vec(0).toCategoricalVec() };
Frame acs_zcta_fr = new Frame(Key.<Frame>make("acs_zcta_fr"), new String[] { "name" }, acs_zcta_vec);
DKV.put(acs_zcta_fr);
train.remove(0).remove();
DKV.put(train);
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._gamma_x = 0.25;
parms._gamma_y = 0.5;
parms._regularization_x = GlrmRegularizer.Quadratic;
parms._regularization_y = GlrmRegularizer.L1;
parms._k = 10;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._max_iterations = 1;
parms._loss = GlrmLoss.Quadratic;
try {
Scope.enter();
job = new GLRM(parms);
model = job.trainModel().get();
String s = "(tmp= py_4 (rows (cols_py " + model._output._representation_key + " [0 1]) (tmp= py_3 (| (| (| (| (| (== (tmp= py_2 " + acs_zcta_fr._key + ") \"10065\") (== py_2 \"11219\")) (== py_2 \"66753\")) (== py_2 \"84104\")) (== py_2 \"94086\")) (== py_2 \"95014\")))))";
Val val = Rapids.exec(s);
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
acs_zcta_fr.delete();
Scope.exit();
}
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
if (train != null)
train.delete();
if (model != null)
model.delete();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class PartialDependenceTest method prostateBinary.
@Test
public void prostateBinary() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/prostate/prostate.csv");
for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
Vec v = fr.remove(s);
fr.add(s, v.toCategoricalVec());
v.remove();
}
DKV.put(fr);
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "ID" };
parms._response_column = "CAPSULE";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
// partialDependence._cols = model._output._names;
partialDependence._nbins = 10;
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class PartialDependenceTest method prostateRegression.
@Test
public void prostateRegression() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/prostate/prostate.csv");
for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
Vec v = fr.remove(s);
fr.add(s, v.toCategoricalVec());
v.remove();
}
DKV.put(fr);
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "ID" };
parms._response_column = "AGE";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
partialDependence._nbins = 10;
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class SVDTest method testCatOnlyPUBDEV3988.
/* Make sure POJO works if the model is only built from categorical variables (no numeric columns) */
@Test
public void testCatOnlyPUBDEV3988() throws InterruptedException, ExecutionException {
SVDModel model = null;
Frame train = null, score = null;
try {
train = parse_test_file(Key.make("prostate_cat.hex"), "smalldata/prostate/prostate_cat.csv");
for (int i = train.numCols() - 1; i > 0; i--) {
Vec v = train.vec(i);
if (v.get_type() != Vec.T_CAT) {
train.remove(i);
Vec.remove(v._key);
}
}
DKV.put(train);
SVDParameters parms = new SVDParameters();
parms._train = train._key;
parms._nv = 2;
parms._only_v = false;
parms._keep_u = true;
parms._svd_method = SVDParameters.Method.Randomized;
parms._impute_missing = true;
parms._max_iterations = 20;
parms._save_v_frame = false;
model = new SVD(parms).trainModel().get();
score = model.score(train);
// Build a POJO, check results with original SVD
Assert.assertTrue(model.testJavaScoring(train, score, TOLERANCE));
} finally {
if (train != null)
train.delete();
if (score != null)
score.delete();
if (model != null)
model.delete();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class DRFGridTest method testCollisionOfDRFParamsChecksum.
@Test
public void testCollisionOfDRFParamsChecksum() {
Frame fr = null;
try {
fr = parse_test_file("smalldata/junit/cars.csv");
fr.remove("name").remove();
Vec old = fr.remove("economy (mpg)");
// response to last column
fr.add("economy (mpg)", old);
DKV.put(fr);
DRFModel.DRFParameters params1 = new DRFModel.DRFParameters();
params1._train = fr._key;
params1._response_column = "economy (mpg)";
params1._seed = -4522296119273841674L;
params1._mtries = 3;
params1._max_depth = 15;
params1._ntrees = 9;
params1._sample_rate = 0.6499997f;
DRFModel.DRFParameters params2 = new DRFModel.DRFParameters();
params2._train = fr._key;
params2._response_column = "economy (mpg)";
params2._seed = -4522296119273841674L;
params2._mtries = 1;
params2._max_depth = 1;
params2._ntrees = 13;
params2._sample_rate = 0.6499997f;
long csum1 = params1.checksum();
long csum2 = params2.checksum();
Assert.assertNotEquals("Checksums shoudl be different", csum1, csum2);
} finally {
if (fr != null) {
fr.remove();
}
}
}
Aggregations