Search in sources :

Example 1 with Quantile

use of hex.quantile.Quantile in project h2o-3 by h2oai.

the class GainsLift method init.

private void init(Job job) throws IllegalArgumentException {
    _labels = _labels.toCategoricalVec();
    if (_labels == null || _preds == null)
        throw new IllegalArgumentException("Missing actualLabels or predictedProbs!");
    if (_labels.length() != _preds.length())
        throw new IllegalArgumentException("Both arguments must have the same length (" + _labels.length() + "!=" + _preds.length() + ")!");
    if (!_labels.isInt())
        throw new IllegalArgumentException("Actual column must be integer class labels!");
    if (_labels.cardinality() != -1 && _labels.cardinality() != 2)
        throw new IllegalArgumentException("Actual column must contain binary class labels, but found cardinality " + _labels.cardinality() + "!");
    if (_preds.isCategorical())
        throw new IllegalArgumentException("Predicted probabilities cannot be class labels, expect probabilities.");
    if (_weights != null && !_weights.isNumeric())
        throw new IllegalArgumentException("Observation weights must be numeric.");
    // The vectors are from different groups => align them, but properly delete it after computation
    if (!_labels.group().equals(_preds.group())) {
        _preds = _labels.align(_preds);
        Scope.track(_preds);
        if (_weights != null) {
            _weights = _labels.align(_weights);
            Scope.track(_weights);
        }
    }
    boolean fast = false;
    if (fast) {
        // FAST VERSION: single-pass, only works with the specific pre-computed quantiles from rollupstats
        assert (_groups == 10);
        assert (Arrays.equals(Vec.PERCENTILES, //             0      1    2    3    4     5        6          7    8   9   10          11    12   13   14    15, 16
        new double[] { 0.001, 0.01, 0.1, 0.2, 0.25, 0.3, 1.0 / 3.0, 0.4, 0.5, 0.6, 2.0 / 3.0, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999 }));
        //HACK: hardcoded quantiles for simplicity (0.9,0.8,...,0.1,0)
        //might do a full pass over the Vec
        double[] rq = _preds.pctiles();
        _quantiles = new double[] { rq[14], rq[13], rq[11], rq[9], rq[8], rq[7], rq[5], rq[3], rq[2], 0 };
    } else {
        // ACCURATE VERSION: multi-pass
        Frame fr = null;
        QuantileModel qm = null;
        try {
            QuantileModel.QuantileParameters qp = new QuantileModel.QuantileParameters();
            if (_weights == null) {
                fr = new Frame(Key.<Frame>make(), new String[] { "predictions" }, new Vec[] { _preds });
            } else {
                fr = new Frame(Key.<Frame>make(), new String[] { "predictions", "weights" }, new Vec[] { _preds, _weights });
                qp._weights_column = "weights";
            }
            DKV.put(fr);
            qp._train = fr._key;
            if (_groups > 0) {
                qp._probs = new double[_groups];
                for (int i = 0; i < _groups; ++i) {
                    // This is 0.9, 0.8, 0.7, 0.6, ..., 0.1, 0 for 10 groups
                    qp._probs[i] = (_groups - i - 1.) / _groups;
                }
            } else {
                qp._probs = new double[] { 0.99, 0.98, 0.97, 0.96, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0 };
            }
            qm = job != null && !job.isDone() ? new Quantile(qp, job).trainModelNested(null) : new Quantile(qp).trainModel().get();
            _quantiles = qm._output._quantiles[0];
            // find uniques (is there a more elegant way?)
            TreeSet<Double> hs = new TreeSet<>();
            for (double d : _quantiles) hs.add(d);
            _quantiles = new double[hs.size()];
            Iterator<Double> it = hs.descendingIterator();
            int i = 0;
            while (it.hasNext()) _quantiles[i++] = it.next();
        } finally {
            if (qm != null)
                qm.remove();
            if (fr != null)
                DKV.remove(fr._key);
        }
    }
}
Also used : Frame(water.fvec.Frame) PrettyPrint(water.util.PrettyPrint) QuantileModel(hex.quantile.QuantileModel) Vec(water.fvec.Vec) TreeSet(java.util.TreeSet) Quantile(hex.quantile.Quantile)

Example 2 with Quantile

use of hex.quantile.Quantile in project h2o-3 by h2oai.

the class AstMedian method median.

public static double median(Frame fr, QuantileModel.CombineMethod combine_method) {
    // Frame needs a Key for Quantile, might not have one from rapids
    Key tk = null;
    if (fr._key == null) {
        DKV.put(tk = Key.make(), fr = new Frame(tk, fr.names(), fr.vecs()));
    }
    // Quantiles to get the median
    QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
    parms._probs = new double[] { 0.5 };
    parms._train = fr._key;
    parms._combine_method = combine_method;
    QuantileModel q = new Quantile(parms).trainModel().get();
    double median = q._output._quantiles[0][0];
    q.delete();
    if (tk != null) {
        DKV.remove(tk);
    }
    return median;
}
Also used : Frame(water.fvec.Frame) Quantile(hex.quantile.Quantile) Key(water.Key) QuantileModel(hex.quantile.QuantileModel)

Example 3 with Quantile

use of hex.quantile.Quantile in project h2o-3 by h2oai.

the class AstQtile method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    // Force a bogus Key for Quantiles ModelBuilder
    Frame fr_wkey = new Frame(fr);
    DKV.put(fr_wkey);
    parms._train = fr_wkey._key;
    parms._probs = ((AstNumList) asts[2]).expand();
    for (double d : parms._probs) if (d < 0 || d > 1)
        throw new IllegalArgumentException("Probability must be between 0 and 1: " + d);
    String inter = asts[3].exec(env).getStr();
    parms._combine_method = QuantileModel.CombineMethod.valueOf(inter.toUpperCase());
    parms._weights_column = asts[4].str().equals("_") ? null : asts[4].str();
    // Compute Quantiles
    Job j = new Quantile(parms).trainModel();
    QuantileModel q = (QuantileModel) j.get();
    DKV.remove(j._key);
    // Remove bogus Key
    DKV.remove(fr_wkey._key);
    // Reshape all outputs as a Frame, with probs in col 0 and the
    // quantiles in cols 1 thru fr.numCols() - except the optional weights vec
    int ncols = fr.numCols();
    if (parms._weights_column != null)
        ncols--;
    Vec[] vecs = new Vec[1 + /*1 more for the probs themselves*/
    ncols];
    String[] names = new String[vecs.length];
    vecs[0] = Vec.makeCon(null, parms._probs);
    names[0] = "Probs";
    int w = 0;
    for (int i = 0; i < vecs.length - 1; ++i) {
        if (fr._names[i].equals(parms._weights_column))
            w = 1;
        vecs[i + 1] = Vec.makeCon(null, q._output._quantiles[i]);
        names[i + 1] = fr._names[w + i] + "Quantiles";
    }
    q.delete();
    return new ValFrame(new Frame(names, vecs));
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) QuantileModel(hex.quantile.QuantileModel) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) Job(water.Job) Quantile(hex.quantile.Quantile)

Example 4 with Quantile

use of hex.quantile.Quantile in project h2o-3 by h2oai.

the class WorkFlowTest method testWorkFlow.

// End-to-end workflow test:
// 1- load set of files, train, test, holdout
// 2- light data munging
// 3- build model on train; using test as validation
// 4- score on holdout set
//
// If files are missing, silently fail - as the files are big and this is not
// yet a junit test
private void testWorkFlow(String[] files) {
    try {
        Scope.enter();
        // 1- Load datasets
        Frame data = load_files("data.hex", files);
        if (data == null)
            return;
        // -------------------------------------------------
        // 2- light data munging
        // Convert start time to: Day since the Epoch
        Vec startime = data.vec("starttime");
        data.add(new TimeSplit().doIt(startime));
        // Now do a monster Group-By.  Count bike starts per-station per-day
        Vec days = data.vec("Days");
        long start = System.currentTimeMillis();
        Frame bph = new CountBikes(days).doAll(days, data.vec("start station name")).makeFrame(Key.make("bph.hex"));
        System.out.println("Groupby took " + (System.currentTimeMillis() - start));
        System.out.println(bph);
        System.out.println(bph.toString(10000, 20));
        data.remove();
        QuantileModel.QuantileParameters quantile_parms = new QuantileModel.QuantileParameters();
        quantile_parms._train = bph._key;
        Job<QuantileModel> job2 = new Quantile(quantile_parms).trainModel();
        QuantileModel quantile = job2.get();
        job2.remove();
        System.out.println(Arrays.deepToString(quantile._output._quantiles));
        quantile.remove();
        // Split into train, test and holdout sets
        Key[] keys = new Key[] { Key.make("train.hex"), Key.make("test.hex"), Key.make("hold.hex") };
        double[] ratios = new double[] { 0.6, 0.3, 0.1 };
        Frame[] frs = ShuffleSplitFrame.shuffleSplitFrame(bph, keys, ratios, 1234567689L);
        Frame train = frs[0];
        Frame test = frs[1];
        Frame hold = frs[2];
        bph.remove();
        System.out.println(train);
        System.out.println(test);
        // -------------------------------------------------
        // 3- build model on train; using test as validation
        // ---
        // Gradient Boosting Machine
        GBMModel.GBMParameters gbm_parms = new GBMModel.GBMParameters();
        // base Model.Parameters
        gbm_parms._train = train._key;
        gbm_parms._valid = test._key;
        // default is false
        gbm_parms._score_each_iteration = false;
        // SupervisedModel.Parameters
        gbm_parms._response_column = "bikes";
        // SharedTreeModel.Parameters
        // default is 50, 1000 is 0.90, 10000 is 0.91
        gbm_parms._ntrees = 500;
        // default is 5
        gbm_parms._max_depth = 6;
        // default
        gbm_parms._min_rows = 10;
        // default
        gbm_parms._nbins = 20;
        // GBMModel.Parameters
        // default
        gbm_parms._distribution = DistributionFamily.gaussian;
        // default
        gbm_parms._learn_rate = 0.1f;
        // Train model; block for results
        Job<GBMModel> job = new GBM(gbm_parms).trainModel();
        GBMModel gbm = job.get();
        job.remove();
        // ---
        // Build a GLM model also
        GLMModel.GLMParameters glm_parms = new GLMModel.GLMParameters(GLMModel.GLMParameters.Family.gaussian);
        // base Model.Parameters
        glm_parms._train = train._key;
        glm_parms._valid = test._key;
        // default is false
        glm_parms._score_each_iteration = false;
        // SupervisedModel.Parameters
        glm_parms._response_column = "bikes";
        // GLMModel.Parameters
        glm_parms._use_all_factor_levels = true;
        // Train model; block for results
        Job<GLMModel> glm_job = new GLM(glm_parms).trainModel();
        GLMModel glm = glm_job.get();
        glm_job.remove();
        // -------------------------------------------------
        // 4- Score on holdout set & report
        gbm.score(train).remove();
        glm.score(train).remove();
        // Cleanup
        train.remove();
        test.remove();
        hold.remove();
    } finally {
        Scope.exit();
    }
}
Also used : ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) GLMModel(hex.glm.GLMModel) GLM(hex.glm.GLM) QuantileModel(hex.quantile.QuantileModel) GBMModel(hex.tree.gbm.GBMModel) GBM(hex.tree.gbm.GBM) Quantile(hex.quantile.Quantile)

Example 5 with Quantile

use of hex.quantile.Quantile in project h2o-3 by h2oai.

the class MathUtils method computeWeightedQuantile.

public static double computeWeightedQuantile(Vec weight, Vec values, double alpha) {
    QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
    Frame tempFrame = weight == null ? new Frame(Key.<Frame>make(), new String[] { "y" }, new Vec[] { values }) : new Frame(Key.<Frame>make(), new String[] { "y", "w" }, new Vec[] { values, weight });
    DKV.put(tempFrame);
    parms._train = tempFrame._key;
    parms._probs = new double[] { alpha };
    parms._weights_column = weight == null ? null : "w";
    Job<QuantileModel> job = new Quantile(parms).trainModel();
    QuantileModel kmm = job.get();
    double value = kmm._output._quantiles[0][0];
    assert (!Double.isNaN(value));
    Log.debug("weighted " + alpha + "-quantile: " + value);
    job.remove();
    kmm.remove();
    DKV.remove(tempFrame._key);
    return value;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Quantile(hex.quantile.Quantile) QuantileModel(hex.quantile.QuantileModel)

Aggregations

Quantile (hex.quantile.Quantile)6 QuantileModel (hex.quantile.QuantileModel)6 Frame (water.fvec.Frame)5 Vec (water.fvec.Vec)3 GLM (hex.glm.GLM)1 GLMModel (hex.glm.GLMModel)1 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)1 GBM (hex.tree.gbm.GBM)1 GBMModel (hex.tree.gbm.GBMModel)1 TreeSet (java.util.TreeSet)1 Job (water.Job)1 Key (water.Key)1 ValFrame (water.rapids.vals.ValFrame)1 PrettyPrint (water.util.PrettyPrint)1