use of hex.quantile.Quantile in project h2o-3 by h2oai.
the class GainsLift method init.
private void init(Job job) throws IllegalArgumentException {
_labels = _labels.toCategoricalVec();
if (_labels == null || _preds == null)
throw new IllegalArgumentException("Missing actualLabels or predictedProbs!");
if (_labels.length() != _preds.length())
throw new IllegalArgumentException("Both arguments must have the same length (" + _labels.length() + "!=" + _preds.length() + ")!");
if (!_labels.isInt())
throw new IllegalArgumentException("Actual column must be integer class labels!");
if (_labels.cardinality() != -1 && _labels.cardinality() != 2)
throw new IllegalArgumentException("Actual column must contain binary class labels, but found cardinality " + _labels.cardinality() + "!");
if (_preds.isCategorical())
throw new IllegalArgumentException("Predicted probabilities cannot be class labels, expect probabilities.");
if (_weights != null && !_weights.isNumeric())
throw new IllegalArgumentException("Observation weights must be numeric.");
// The vectors are from different groups => align them, but properly delete it after computation
if (!_labels.group().equals(_preds.group())) {
_preds = _labels.align(_preds);
Scope.track(_preds);
if (_weights != null) {
_weights = _labels.align(_weights);
Scope.track(_weights);
}
}
boolean fast = false;
if (fast) {
// FAST VERSION: single-pass, only works with the specific pre-computed quantiles from rollupstats
assert (_groups == 10);
assert (Arrays.equals(Vec.PERCENTILES, // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15, 16
new double[] { 0.001, 0.01, 0.1, 0.2, 0.25, 0.3, 1.0 / 3.0, 0.4, 0.5, 0.6, 2.0 / 3.0, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999 }));
//HACK: hardcoded quantiles for simplicity (0.9,0.8,...,0.1,0)
//might do a full pass over the Vec
double[] rq = _preds.pctiles();
_quantiles = new double[] { rq[14], rq[13], rq[11], rq[9], rq[8], rq[7], rq[5], rq[3], rq[2], 0 };
} else {
// ACCURATE VERSION: multi-pass
Frame fr = null;
QuantileModel qm = null;
try {
QuantileModel.QuantileParameters qp = new QuantileModel.QuantileParameters();
if (_weights == null) {
fr = new Frame(Key.<Frame>make(), new String[] { "predictions" }, new Vec[] { _preds });
} else {
fr = new Frame(Key.<Frame>make(), new String[] { "predictions", "weights" }, new Vec[] { _preds, _weights });
qp._weights_column = "weights";
}
DKV.put(fr);
qp._train = fr._key;
if (_groups > 0) {
qp._probs = new double[_groups];
for (int i = 0; i < _groups; ++i) {
// This is 0.9, 0.8, 0.7, 0.6, ..., 0.1, 0 for 10 groups
qp._probs[i] = (_groups - i - 1.) / _groups;
}
} else {
qp._probs = new double[] { 0.99, 0.98, 0.97, 0.96, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0 };
}
qm = job != null && !job.isDone() ? new Quantile(qp, job).trainModelNested(null) : new Quantile(qp).trainModel().get();
_quantiles = qm._output._quantiles[0];
// find uniques (is there a more elegant way?)
TreeSet<Double> hs = new TreeSet<>();
for (double d : _quantiles) hs.add(d);
_quantiles = new double[hs.size()];
Iterator<Double> it = hs.descendingIterator();
int i = 0;
while (it.hasNext()) _quantiles[i++] = it.next();
} finally {
if (qm != null)
qm.remove();
if (fr != null)
DKV.remove(fr._key);
}
}
}
use of hex.quantile.Quantile in project h2o-3 by h2oai.
the class AstMedian method median.
public static double median(Frame fr, QuantileModel.CombineMethod combine_method) {
// Frame needs a Key for Quantile, might not have one from rapids
Key tk = null;
if (fr._key == null) {
DKV.put(tk = Key.make(), fr = new Frame(tk, fr.names(), fr.vecs()));
}
// Quantiles to get the median
QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
parms._probs = new double[] { 0.5 };
parms._train = fr._key;
parms._combine_method = combine_method;
QuantileModel q = new Quantile(parms).trainModel().get();
double median = q._output._quantiles[0][0];
q.delete();
if (tk != null) {
DKV.remove(tk);
}
return median;
}
use of hex.quantile.Quantile in project h2o-3 by h2oai.
the class AstQtile method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
Frame fr = stk.track(asts[1].exec(env)).getFrame();
// Force a bogus Key for Quantiles ModelBuilder
Frame fr_wkey = new Frame(fr);
DKV.put(fr_wkey);
parms._train = fr_wkey._key;
parms._probs = ((AstNumList) asts[2]).expand();
for (double d : parms._probs) if (d < 0 || d > 1)
throw new IllegalArgumentException("Probability must be between 0 and 1: " + d);
String inter = asts[3].exec(env).getStr();
parms._combine_method = QuantileModel.CombineMethod.valueOf(inter.toUpperCase());
parms._weights_column = asts[4].str().equals("_") ? null : asts[4].str();
// Compute Quantiles
Job j = new Quantile(parms).trainModel();
QuantileModel q = (QuantileModel) j.get();
DKV.remove(j._key);
// Remove bogus Key
DKV.remove(fr_wkey._key);
// Reshape all outputs as a Frame, with probs in col 0 and the
// quantiles in cols 1 thru fr.numCols() - except the optional weights vec
int ncols = fr.numCols();
if (parms._weights_column != null)
ncols--;
Vec[] vecs = new Vec[1 + /*1 more for the probs themselves*/
ncols];
String[] names = new String[vecs.length];
vecs[0] = Vec.makeCon(null, parms._probs);
names[0] = "Probs";
int w = 0;
for (int i = 0; i < vecs.length - 1; ++i) {
if (fr._names[i].equals(parms._weights_column))
w = 1;
vecs[i + 1] = Vec.makeCon(null, q._output._quantiles[i]);
names[i + 1] = fr._names[w + i] + "Quantiles";
}
q.delete();
return new ValFrame(new Frame(names, vecs));
}
use of hex.quantile.Quantile in project h2o-3 by h2oai.
the class WorkFlowTest method testWorkFlow.
// End-to-end workflow test:
// 1- load set of files, train, test, holdout
// 2- light data munging
// 3- build model on train; using test as validation
// 4- score on holdout set
//
// If files are missing, silently fail - as the files are big and this is not
// yet a junit test
private void testWorkFlow(String[] files) {
try {
Scope.enter();
// 1- Load datasets
Frame data = load_files("data.hex", files);
if (data == null)
return;
// -------------------------------------------------
// 2- light data munging
// Convert start time to: Day since the Epoch
Vec startime = data.vec("starttime");
data.add(new TimeSplit().doIt(startime));
// Now do a monster Group-By. Count bike starts per-station per-day
Vec days = data.vec("Days");
long start = System.currentTimeMillis();
Frame bph = new CountBikes(days).doAll(days, data.vec("start station name")).makeFrame(Key.make("bph.hex"));
System.out.println("Groupby took " + (System.currentTimeMillis() - start));
System.out.println(bph);
System.out.println(bph.toString(10000, 20));
data.remove();
QuantileModel.QuantileParameters quantile_parms = new QuantileModel.QuantileParameters();
quantile_parms._train = bph._key;
Job<QuantileModel> job2 = new Quantile(quantile_parms).trainModel();
QuantileModel quantile = job2.get();
job2.remove();
System.out.println(Arrays.deepToString(quantile._output._quantiles));
quantile.remove();
// Split into train, test and holdout sets
Key[] keys = new Key[] { Key.make("train.hex"), Key.make("test.hex"), Key.make("hold.hex") };
double[] ratios = new double[] { 0.6, 0.3, 0.1 };
Frame[] frs = ShuffleSplitFrame.shuffleSplitFrame(bph, keys, ratios, 1234567689L);
Frame train = frs[0];
Frame test = frs[1];
Frame hold = frs[2];
bph.remove();
System.out.println(train);
System.out.println(test);
// -------------------------------------------------
// 3- build model on train; using test as validation
// ---
// Gradient Boosting Machine
GBMModel.GBMParameters gbm_parms = new GBMModel.GBMParameters();
// base Model.Parameters
gbm_parms._train = train._key;
gbm_parms._valid = test._key;
// default is false
gbm_parms._score_each_iteration = false;
// SupervisedModel.Parameters
gbm_parms._response_column = "bikes";
// SharedTreeModel.Parameters
// default is 50, 1000 is 0.90, 10000 is 0.91
gbm_parms._ntrees = 500;
// default is 5
gbm_parms._max_depth = 6;
// default
gbm_parms._min_rows = 10;
// default
gbm_parms._nbins = 20;
// GBMModel.Parameters
// default
gbm_parms._distribution = DistributionFamily.gaussian;
// default
gbm_parms._learn_rate = 0.1f;
// Train model; block for results
Job<GBMModel> job = new GBM(gbm_parms).trainModel();
GBMModel gbm = job.get();
job.remove();
// ---
// Build a GLM model also
GLMModel.GLMParameters glm_parms = new GLMModel.GLMParameters(GLMModel.GLMParameters.Family.gaussian);
// base Model.Parameters
glm_parms._train = train._key;
glm_parms._valid = test._key;
// default is false
glm_parms._score_each_iteration = false;
// SupervisedModel.Parameters
glm_parms._response_column = "bikes";
// GLMModel.Parameters
glm_parms._use_all_factor_levels = true;
// Train model; block for results
Job<GLMModel> glm_job = new GLM(glm_parms).trainModel();
GLMModel glm = glm_job.get();
glm_job.remove();
// -------------------------------------------------
// 4- Score on holdout set & report
gbm.score(train).remove();
glm.score(train).remove();
// Cleanup
train.remove();
test.remove();
hold.remove();
} finally {
Scope.exit();
}
}
use of hex.quantile.Quantile in project h2o-3 by h2oai.
the class MathUtils method computeWeightedQuantile.
public static double computeWeightedQuantile(Vec weight, Vec values, double alpha) {
QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
Frame tempFrame = weight == null ? new Frame(Key.<Frame>make(), new String[] { "y" }, new Vec[] { values }) : new Frame(Key.<Frame>make(), new String[] { "y", "w" }, new Vec[] { values, weight });
DKV.put(tempFrame);
parms._train = tempFrame._key;
parms._probs = new double[] { alpha };
parms._weights_column = weight == null ? null : "w";
Job<QuantileModel> job = new Quantile(parms).trainModel();
QuantileModel kmm = job.get();
double value = kmm._output._quantiles[0][0];
assert (!Double.isNaN(value));
Log.debug("weighted " + alpha + "-quantile: " + value);
job.remove();
kmm.remove();
DKV.remove(tempFrame._key);
return value;
}
Aggregations