use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testReproducibility.
//
@Test
public void testReproducibility() {
Frame tfr = null;
final int N = 5;
double[] mses = new double[N];
Scope.enter();
try {
// Load data, hack frames
tfr = parse_test_file("smalldata/covtype/covtype.20k.data");
// rebalance to 256 chunks
Key dest = Key.make("df.rebalanced.hex");
RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
H2O.submitTask(rb);
rb.join();
tfr.delete();
tfr = DKV.get(dest).get();
for (int i = 0; i < N; ++i) {
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "C55";
parms._nbins = 1000;
parms._ntrees = 1;
parms._max_depth = 8;
parms._mtries = -1;
parms._min_rows = 10;
parms._seed = 1234;
// Build a first model; all remaining models should be equal
DRFModel drf = new DRF(parms).trainModel().get();
assertEquals(drf._output._ntrees, parms._ntrees);
mses[i] = drf._output._scored_train[drf._output._scored_train.length - 1]._mse;
drf.delete();
}
} finally {
if (tfr != null)
tfr.remove();
}
Scope.exit();
for (int i = 0; i < mses.length; ++i) {
Log.info("trial: " + i + " -> MSE: " + mses[i]);
}
for (double mse : mses) assertEquals(mse, mses[0], 1e-15);
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class DRFTest method testRowWeights.
@Test
public void testRowWeights() {
Frame tfr = null, vfr = null;
DRFModel drf = null;
Scope.enter();
try {
tfr = parse_test_file("smalldata/junit/weights.csv");
DKV.put(tfr);
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "response";
parms._weights_column = "weight";
parms._seed = 234;
parms._min_rows = 1;
parms._max_depth = 2;
parms._ntrees = 3;
// Build a first model; all remaining models should be equal
drf = new DRF(parms).trainModel().get();
// OOB
// Reduced number of rows changes the row sampling -> results differ
ModelMetricsBinomial mm = (ModelMetricsBinomial) drf._output._training_metrics;
assertEquals(1.0, mm.auc_obj()._auc, 1e-8);
assertEquals(0.05823863636363636, mm.mse(), 1e-8);
assertEquals(0.21035264541934587, mm.logloss(), 1e-6);
// test set scoring (on the same dataset, but without normalizing the weights)
Frame pred = drf.score(parms.train());
hex.ModelMetricsBinomial mm2 = hex.ModelMetricsBinomial.getFromDKV(drf, parms.train());
// Non-OOB
assertEquals(1, mm2.auc_obj()._auc, 1e-8);
assertEquals(0.0154320987654321, mm2.mse(), 1e-8);
assertEquals(0.08349430638608361, mm2.logloss(), 1e-8);
pred.remove();
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (drf != null)
drf.delete();
Scope.exit();
}
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class FramesV3 method fillFromImpl.
@Override
public FramesV3 fillFromImpl(Frames f) {
this.frame_id = new KeyV3.FrameKeyV3(f.frame_id);
// NOTE: this is needed for request handling, but isn't really part of state
this.column = f.column;
this.find_compatible_models = f.find_compatible_models;
if (f.frames != null) {
this.frames = new FrameV3[f.frames.length];
int i = 0;
for (Frame frame : f.frames) {
this.frames[i++] = new FrameV3(frame, f.row_offset, f.row_count);
}
}
return this;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class ModelMetricsBaseV3 method fillFromImpl.
@Override
public S fillFromImpl(ModelMetrics modelMetrics) {
// If we're copying in a Model we need a ModelSchemaV3 of the right class to fill into.
Model m = modelMetrics.model();
if (m != null) {
this.model = new ModelKeyV3(m._key);
this.model_category = m._output.getModelCategory();
this.model_checksum = m.checksum();
}
// If we're copying in a Frame we need a Frame Schema of the right class to fill into.
Frame f = modelMetrics.frame();
if (null != f) {
//true == f.getClass().getSuperclass().getGenericSuperclass() instanceof ParameterizedType
this.frame = new FrameKeyV3(f._key);
this.frame_checksum = f.checksum();
}
PojoUtils.copyProperties(this, modelMetrics, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "model", "model_category", "model_checksum", "frame", "frame_checksum" });
RMSE = modelMetrics.rmse();
return (S) this;
}
use of water.fvec.Frame in project h2o-3 by h2oai.
the class AstCut method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
double[] cuts = check(asts[2]);
Arrays.sort(cuts);
String[] labels = check2(asts[3]);
final boolean lowest = asts[4].exec(env).getNum() == 1;
final boolean rite = asts[5].exec(env).getNum() == 1;
// cap at 12
final int digits = Math.min((int) asts[6].exec(env).getNum(), 12);
if (fr.vecs().length != 1 || fr.vecs()[0].isCategorical())
throw new IllegalArgumentException("First argument must be a numeric column vector");
double fmin = fr.anyVec().min();
double fmax = fr.anyVec().max();
// c(0,10,100) -> 2 bins (0,10] U (10, 100]
int nbins = cuts.length - 1;
double width;
if (nbins == 0) {
if (cuts[0] < 2)
throw new IllegalArgumentException("The number of cuts must be >= 2. Got: " + cuts[0]);
// in this case, cut the vec into _cuts[0] many pieces of equal length
nbins = (int) Math.floor(cuts[0]);
width = (fmax - fmin) / nbins;
cuts = new double[nbins];
cuts[0] = fmin - 0.001 * (fmax - fmin);
for (int i = 1; i < cuts.length; ++i) cuts[i] = (i == cuts.length - 1) ? (fmax + 0.001 * (fmax - fmin)) : (fmin + i * width);
}
// if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
if (labels != null && labels.length != nbins)
throw new IllegalArgumentException("`labels` vector does not match the number of cuts.");
// Construct domain names from _labels or bin intervals if _labels is null
final double[] cutz = cuts;
// first round _cuts to dig.lab decimals: example floor(2.676*100 + 0.5) / 100
for (int i = 0; i < cuts.length; ++i) cuts[i] = Math.floor(cuts[i] * Math.pow(10, digits) + 0.5) / Math.pow(10, digits);
String[][] domains = new String[1][nbins];
if (labels == null) {
domains[0][0] = (lowest ? "[" : left(rite)) + cuts[0] + "," + cuts[1] + rite(rite);
for (int i = 1; i < (cuts.length - 1); ++i) domains[0][i] = left(rite) + cuts[i] + "," + cuts[i + 1] + rite(rite);
} else
domains[0] = labels;
Frame fr2 = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
int rows = c._len;
for (int r = 0; r < rows; ++r) {
double x = c.atd(r);
if (Double.isNaN(x) || (lowest && x < cutz[0]) || (!lowest && (x < cutz[0] || MathUtils.equalsWithinOneSmallUlp(x, cutz[0]))) || (rite && x > cutz[cutz.length - 1]) || (!rite && (x > cutz[cutz.length - 1] || MathUtils.equalsWithinOneSmallUlp(x, cutz[cutz.length - 1]))))
nc.addNum(Double.NaN);
else {
for (int i = 1; i < cutz.length; ++i) {
if (rite) {
if (x <= cutz[i]) {
nc.addNum(i - 1);
break;
}
} else if (x < cutz[i]) {
nc.addNum(i - 1);
break;
}
}
}
}
}
}.doAll(1, Vec.T_NUM, fr).outputFrame(fr.names(), domains);
return new ValFrame(fr2);
}
Aggregations