Search in sources :

Example 1 with Submodel

use of hex.glm.GLMModel.Submodel in project h2o-2 by h2oai.

the class GLM2 method run.

public void run(boolean doLog, H2OCountedCompleter cmp) {
    if (doLog)
        logStart();
    // just fork off the nfolds+1 tasks and wait for the results
    assert alpha.length == 1;
    start_time = System.currentTimeMillis();
    if (nlambdas == -1)
        nlambdas = 100;
    if (lambda_search && nlambdas <= 1)
        throw new IllegalArgumentException(LogInfo("GLM2: nlambdas must be > 1 when running with lambda search."));
    Futures fs = new Futures();
    Key dst = dest();
    new YMUTask(GLM2.this.self(), _srcDinfo, n_folds, new H2OCallback<YMUTask>(cmp) {

        @Override
        public String toString() {
            return "YMUTask callback. completer = " + getCompleter() != null ? "null" : getCompleter().toString();
        }

        @Override
        public void callback(final YMUTask ymut) {
            if (ymut._ymin == ymut._ymax)
                throw new IllegalArgumentException(LogInfo("GLM2: attempted to run with constant response. Response == " + ymut._ymin + " for all rows in the training set."));
            if (ymut.nobs() == 0)
                throw new IllegalArgumentException(LogInfo("GLM2: got no active rows in the dataset after discarding rows with NAs"));
            _ymu = ymut.ymu();
            _nobs = ymut.nobs();
            if (_glm.family == Family.binomial && prior != -1 && prior != _ymu && !Double.isNaN(prior)) {
                _iceptAdjust = -Math.log(_ymu * (1 - prior) / (prior * (1 - _ymu)));
            } else
                prior = _ymu;
            H2OCountedCompleter cmp = (H2OCountedCompleter) getCompleter();
            cmp.addToPendingCount(1);
            // public GLMIterationTask(int noff, Key jobKey, DataInfo dinfo, GLMParams glm, boolean computeGram, boolean validate, boolean computeGradient, double [] beta, double ymu, double reg, float [] thresholds, H2OCountedCompleter cmp) {
            new GLMIterationTask(_noffsets, GLM2.this.self(), _srcDinfo, _glm, false, true, true, nullModelBeta(_srcDinfo, _ymu), _ymu, 1.0 / _nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp) {

                @Override
                public String toString() {
                    return "LMAXTask callback. completer = " + (getCompleter() != null ? "NULL" : getCompleter().toString());
                }

                @Override
                public void callback(final GLMIterationTask glmt) {
                    double[] beta = glmt._beta;
                    if (beta_start == null) {
                        beta_start = beta;
                    }
                    _nullDeviance = glmt._val.residualDeviance();
                    _currentLambda = lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
                    _lastResult = makeIterationInfo(0, glmt, null, glmt.gradient(0, 0));
                    GLMModel model = new GLMModel(GLM2.this, dest(), _srcDinfo, _glm, glmt._val, beta_epsilon, alpha[0], lambda_max, _ymu, prior);
                    model.start_training(start_time);
                    if (lambda_search) {
                        assert !Double.isNaN(lambda_max) : LogInfo("running lambda_value search, but don't know what is the lambda_value max!");
                        model = addLmaxSubmodel(model, glmt._val, beta);
                        if (nlambdas == -1) {
                            lambda = null;
                        } else {
                            if (lambda_min_ratio == -1)
                                lambda_min_ratio = _nobs > 25 * _srcDinfo.fullN() ? 1e-4 : 1e-2;
                            final double d = Math.pow(lambda_min_ratio, 1.0 / (nlambdas - 1));
                            if (nlambdas == 0)
                                throw new IllegalArgumentException("nlambdas must be > 0 when running lambda search.");
                            lambda = new double[nlambdas];
                            lambda[0] = lambda_max;
                            if (nlambdas == 1)
                                throw new IllegalArgumentException("Number of lambdas must be > 1 when running with lambda_search!");
                            for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
                            lambda_min = lambda[lambda.length - 1];
                            max_iter = MAX_ITERATIONS_PER_LAMBDA * nlambdas;
                        }
                        _runAllLambdas = false;
                    } else {
                        if (lambda == null || lambda.length == 0)
                            lambda = new double[] { DEFAULT_LAMBDA };
                        int i = 0;
                        while (i < lambda.length && lambda[i] > lambda_max) ++i;
                        if (i == lambda.length)
                            throw new IllegalArgumentException("Given lambda(s) are all > lambda_max = " + lambda_max + ", have nothing to run with. lambda = " + Arrays.toString(lambda));
                        if (i > 0) {
                            model.addWarning("Removed " + i + " lambdas greater than lambda_max.");
                            lambda = Utils.append(new double[] { lambda_max }, Arrays.copyOfRange(lambda, i, lambda.length));
                            addLmaxSubmodel(model, glmt._val, beta);
                        }
                    }
                    model.delete_and_lock(self());
                    lambda_min = lambda[lambda.length - 1];
                    if (n_folds > 1) {
                        final H2OCountedCompleter futures = new H2OEmptyCompleter();
                        final GLM2[] xvals = new GLM2[n_folds + 1];
                        futures.addToPendingCount(xvals.length - 2);
                        for (int i = 0; i < xvals.length; ++i) {
                            xvals[i] = (GLM2) GLM2.this.clone();
                            xvals[i].n_folds = 0;
                            xvals[i].standardize = standardize;
                            xvals[i].family = family;
                            xvals[i].link = link;
                            xvals[i].beta_epsilon = beta_epsilon;
                            xvals[i].max_iter = max_iter;
                            xvals[i].variable_importances = variable_importances;
                            if (i != 0) {
                                xvals[i]._srcDinfo = _srcDinfo.getFold(i - 1, n_folds);
                                xvals[i].destination_key = Key.make(dest().toString() + "_xval_" + i, (byte) 1, Key.HIDDEN_USER_KEY, H2O.SELF);
                                xvals[i]._nobs = ymut.nobs(i - 1);
                                xvals[i]._ymu = ymut.ymu(i - 1);
                                final int fi = i;
                                final double ymu = ymut.ymu(fi - 1);
                                // new GLMIterationTask(offset_cols.length,GLM2.this.self(), _srcDinfo, _glm, false, true, true,nullModelBeta(),_ymu,1.0/_nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp){
                                new GLMIterationTask(_noffsets, self(), xvals[i]._srcDinfo, _glm, false, true, true, nullModelBeta(xvals[fi]._srcDinfo, ymu), ymu, 1.0 / ymut.nobs(fi - 1), thresholds, new H2OCallback<GLMIterationTask>(futures) {

                                    @Override
                                    public String toString() {
                                        return "Xval LMAXTask callback., completer = " + getCompleter() == null ? "null" : getCompleter().toString();
                                    }

                                    @Override
                                    public void callback(GLMIterationTask t) {
                                        xvals[fi].beta_start = t._beta;
                                        xvals[fi]._currentLambda = xvals[fi].lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
                                        assert xvals[fi].lambda_max > 0;
                                        xvals[fi]._lastResult = makeIterationInfo(0, t, null, t.gradient(alpha[0], 0));
                                        //.delete_and_lock(self());
                                        GLMModel m = new GLMModel(GLM2.this, xvals[fi].destination_key, xvals[fi]._srcDinfo, _glm, t._val, beta_epsilon, alpha[0], xvals[fi].lambda_max, xvals[fi]._ymu, prior);
                                        m.submodels = new Submodel[] { new Submodel(xvals[fi].lambda_max, t._beta, t._beta, 0, 0, t._beta.length >= sparseCoefThreshold) };
                                        m.submodels[0].validation = t._val;
                                        assert t._val != null;
                                        m.setSubmodelIdx(0);
                                        m.delete_and_lock(self());
                                        if (xvals[fi].lambda_max > lambda_max) {
                                            futures.addToPendingCount(1);
                                            new ParallelGLMs(GLM2.this, new GLM2[] { xvals[fi] }, lambda_max, 1, futures).fork();
                                        }
                                    }
                                }).asyncExec(xvals[i]._srcDinfo._adaptedFrame);
                            }
                        }
                        _xvals = xvals;
                        futures.join();
                    }
                    getCompleter().addToPendingCount(1);
                    nextLambda(nextLambdaValue(), new LambdaIteration(getCompleter()));
                }
            }).asyncExec(_srcDinfo._adaptedFrame);
        }
    }).asyncExec(_srcDinfo._adaptedFrame);
}
Also used : Submodel(hex.glm.GLMModel.Submodel) YMUTask(hex.glm.GLMTask.YMUTask) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) H2OCallback(water.H2O.H2OCallback) H2OCountedCompleter(water.H2O.H2OCountedCompleter) H2OEmptyCompleter(water.H2O.H2OEmptyCompleter)

Example 2 with Submodel

use of hex.glm.GLMModel.Submodel in project h2o-2 by h2oai.

the class GLMModelView method toHTML.

@Override
public boolean toHTML(StringBuilder sb) {
    //      if(title != null && !title.isEmpty())DocGen.HTML.title(sb,title);
    if (glm_model == null) {
        sb.append("No model yet...");
        return true;
    }
    glm_model.get_params().makeJsonBox(sb);
    DocGen.HTML.paragraph(sb, "Model Key: " + glm_model._key);
    if (glm_model.submodels != null) {
        DocGen.HTML.paragraph(sb, water.api.GLMPredict.link(glm_model._key, lambda, "Predict!"));
        DocGen.HTML.paragraph(sb, UIUtils.qlink(SaveModel.class, "model", glm_model._key, "Save model"));
    }
    String succ = (glm_model.warnings == null || glm_model.warnings.length == 0) ? "alert-success" : "alert-warning";
    sb.append("<div class='alert " + succ + "'>");
    pprintTime(sb.append(glm_model.iteration() + " iterations computed in "), glm_model.run_time);
    if (glm_model.warnings != null && glm_model.warnings.length > 0) {
        sb.append("<ul>");
        for (String w : glm_model.warnings) sb.append("<li><b>Warning:</b>" + w + "</li>");
        sb.append("</ul>");
    }
    sb.append("</div>");
    if (!Double.isNaN(lambda) && lambda != glm_model.submodels[glm_model.best_lambda_idx].lambda_value) {
        // show button to permanently set lambda_value to this value
        sb.append("<div class='alert alert-warning'>\n");
        sb.append(GLMModelUpdate.link("Set lambda_value to current value!", _modelKey, lambda) + "\n");
        sb.append("</div>");
    }
    sb.append("<h4>Parameters</h4>");
    parm(sb, "family", glm_model.glm.family);
    parm(sb, "link", glm_model.glm.link);
    parm(sb, "&epsilon;<sub>&beta;</sub>", glm_model.beta_eps);
    parm(sb, "&alpha;", glm_model.alpha);
    if (!Double.isNaN(glm_model.lambda_max))
        parm(sb, "&lambda;<sub>max</sub>", DFORMAT2.format(glm_model.lambda_max));
    parm(sb, "&lambda;", DFORMAT2.format(lambda));
    if (glm_model.submodels.length > 1) {
        sb.append("\n<table class='table table-bordered table-condensed'>\n");
        StringBuilder firstRow = new StringBuilder("\t<tr><th>&lambda;</th>\n");
        StringBuilder secondRow = new StringBuilder("\t<tr><th>nonzeros</th>\n");
        StringBuilder thirdRow = new StringBuilder("\t<tr><th>Deviance Explained</th>\n");
        StringBuilder fourthRow = new StringBuilder("\t<tr><th>" + (glm_model.glm.family == Family.binomial ? "AUC" : "AIC") + "</th>\n");
        for (int i = 0; i < glm_model.submodels.length; ++i) {
            final Submodel sm = glm_model.submodels[i];
            if (sm.validation == null)
                break;
            if (glm_model.submodels[i].lambda_value == lambda)
                firstRow.append("\t\t<td><b>" + DFORMAT2.format(glm_model.submodels[i].lambda_value) + "</b></td>\n");
            else
                firstRow.append("\t\t<td>" + link(DFORMAT2.format(glm_model.submodels[i].lambda_value), glm_model._key, glm_model.submodels[i].lambda_value) + "</td>\n");
            // rank counts intercept, that's why -1 is there, however, intercept can be 0 as well, so just prevent -1
            secondRow.append("\t\t<td>" + Math.max(0, (sm.rank - 1)) + "</td>\n");
            if (sm.xvalidation != null) {
                thirdRow.append("\t\t<td>" + DFORMAT.format(1 - sm.xvalidation.residual_deviance / glm_model.null_validation.residualDeviance()) + "<sub>x</sub>(" + DFORMAT.format(1 - sm.validation.residual_deviance / glm_model.null_validation.residualDeviance()) + ")" + "</td>\n");
                fourthRow.append("\t\t<td>" + DFORMAT.format(glm_model.glm.family == Family.binomial ? sm.xvalidation.auc : sm.xvalidation.aic) + "<sub>x</sub>(" + DFORMAT.format(glm_model.glm.family == Family.binomial ? sm.validation.auc : sm.validation.aic) + ")</td>\n");
            } else {
                thirdRow.append("\t\t<td>" + DFORMAT.format(1 - sm.validation.residual_deviance / glm_model.null_validation.residualDeviance()) + "</td>\n");
                fourthRow.append("\t\t<td>" + DFORMAT.format(glm_model.glm.family == Family.binomial ? sm.validation.auc : sm.validation.aic) + "</td>\n");
            }
        }
        sb.append(firstRow.append("\t</tr>\n"));
        sb.append(secondRow.append("\t</tr>\n"));
        sb.append(thirdRow.append("\t</tr>\n"));
        sb.append(fourthRow.append("\t</tr>\n"));
        sb.append("</table>\n");
    }
    if (glm_model.submodels.length == 0)
        return true;
    Submodel sm = glm_model.submodels[glm_model.best_lambda_idx];
    if (!Double.isNaN(lambda) && glm_model.submodels[glm_model.best_lambda_idx].lambda_value != lambda) {
        int ii = 0;
        sm = glm_model.submodels[0];
        while (glm_model.submodels[ii].lambda_value != lambda && ++ii < glm_model.submodels.length) sm = glm_model.submodels[ii];
        if (ii == glm_model.submodels.length)
            throw new IllegalArgumentException("Unexpected value of lambda '" + lambda + "'");
    }
    if (glm_model.submodels != null)
        coefs2html(sm, sb);
    if (sm.xvalidation != null)
        val2HTML(sm, sm.xvalidation, sb);
    else if (sm.validation != null)
        val2HTML(sm, sm.validation, sb);
    // Variable importance
    if (glm_model.varimp() != null) {
        glm_model.varimp().toHTML(glm_model, sb);
    }
    return true;
}
Also used : Submodel(hex.glm.GLMModel.Submodel) RString(water.util.RString)

Aggregations

Submodel (hex.glm.GLMModel.Submodel)2 GLMIterationTask (hex.glm.GLMTask.GLMIterationTask)1 YMUTask (hex.glm.GLMTask.YMUTask)1 H2OCallback (water.H2O.H2OCallback)1 H2OCountedCompleter (water.H2O.H2OCountedCompleter)1 H2OEmptyCompleter (water.H2O.H2OEmptyCompleter)1 RString (water.util.RString)1