Search in sources :

Example 1 with GLMIterationTask

use of hex.glm.GLMTask.GLMIterationTask in project h2o-2 by h2oai.

the class GLM2 method nextLambda.

void nextLambda(final double currentLambda, final H2OCountedCompleter cmp) {
    if (currentLambda > lambda_max) {
        _done = true;
        cmp.tryComplete();
        return;
    }
    if (_beta != null)
        beta_start = _beta;
    _iter1 = _iter;
    LogInfo("starting computation of lambda = " + currentLambda + ", previous lambda = " + _currentLambda);
    _done = false;
    final double previousLambda = _currentLambda;
    _currentLambda = currentLambda;
    if (n_folds > 1) {
        // if we're cross-validated tasks, just fork off the parallel glms and wait for result!
        for (int i = 0; i < _xvals.length; ++i) if (_xvals[i]._lastResult._fullGrad == null) {
            RuntimeException re = new RuntimeException(LogInfo("missing full gradient at lambda = " + previousLambda + " at fold " + i));
            Log.err(re);
            throw re;
        }
        ParallelGLMs pgs = new ParallelGLMs(this, _xvals, currentLambda, H2O.CLOUD.size(), new XvalidationCallback(cmp));
        pgs.fork();
        return;
    } else {
        if (lambda_search) {
            // if we are in lambda_search, we want only limited number of iters per lambda!
            max_iter = _iter + MAX_ITERATIONS_PER_LAMBDA;
        }
        final double[] grad = _lastResult.fullGrad(alpha[0], previousLambda);
        assert grad != null;
        activeCols(_currentLambda, previousLambda, grad);
        if (_activeCols != null && _activeCols.length == _noffsets) {
            // nothing to do but to store the null model and report back...
            setSubmodel(_lastResult._glmt._beta, _lastResult._glmt._val, cmp);
            _done = true;
            cmp.tryComplete();
            return;
        }
        assert cmp.getPendingCount() == 0;
        // expand the beta
        // todo make this work again
        //      if (Arrays.equals(_lastResult._activeCols, _activeCols) && _lastResult._glmt._gram != null) { // set of coefficients did not change
        //        new Iteration(cmp, false).callback(_lastResult._glmt);
        //        _lastResult._glmt.tryComplete();  // shortcut to reuse the last gram if same active columns
        //      } else
        new GLMIterationTask(_noffsets, GLM2.this.self(), _activeData, _glm, true, false, false, resizeVec(_lastResult._glmt._beta, _activeCols, _lastResult._activeCols), _ymu, 1.0 / _nobs, thresholds, new Iteration(cmp)).asyncExec(_activeData._adaptedFrame);
        ;
    }
}
Also used : GLMIterationTask(hex.glm.GLMTask.GLMIterationTask)

Example 2 with GLMIterationTask

use of hex.glm.GLMTask.GLMIterationTask in project h2o-2 by h2oai.

the class GLM2 method checkKKTAndComplete.

protected void checkKKTAndComplete(final CountedCompleter cc, final GLMIterationTask glmt, final double[] newBeta, final boolean failedLineSearch) {
    H2OCountedCompleter cmp = (H2OCountedCompleter) cc;
    final double[] fullBeta = newBeta == null ? MemoryManager.malloc8d(_srcDinfo.fullN() + _intercept - _noffsets) : expandVec(newBeta, _activeCols);
    // now we need full gradient (on all columns) using this beta
    new GLMIterationTask(_noffsets, GLM2.this.self(), _srcDinfo, _glm, false, true, true, fullBeta, _ymu, 1.0 / _nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp) {

        @Override
        public String toString() {
            return "checkKKTAndComplete.Callback, completer = " + getCompleter() == null ? "null" : getCompleter().toString();
        }

        @Override
        public void callback(final GLMIterationTask glmt2) {
            // first check KKT conditions!
            final double[] grad = glmt2.gradient(alpha[0], _currentLambda);
            if (Utils.hasNaNsOrInfs(grad)) {
                _failedLineSearch = true;
            // TODO: add warning and break the lambda search? Or throw Exception?
            }
            glmt._val = glmt2._val;
            _lastResult = makeIterationInfo(_iter, glmt2, null, glmt2.gradient(alpha[0], 0));
            // check the KKT conditions and filter data for next lambda_value
            // check the gradient
            double[] subgrad = grad.clone();
            ADMMSolver.subgrad(alpha[0], _currentLambda, fullBeta, subgrad);
            double grad_eps = GLM_GRAD_EPS;
            if (!failedLineSearch && _activeCols != null) {
                for (int c = 0; c < _activeCols.length - _noffsets; ++c) if (subgrad[_activeCols[c]] > grad_eps)
                    grad_eps = subgrad[_activeCols[c]];
                else if (subgrad[c] < -grad_eps)
                    grad_eps = -subgrad[_activeCols[c]];
                int[] failedCols = new int[64];
                int fcnt = 0;
                for (int i = 0; i < grad.length - 1; ++i) {
                    if (Arrays.binarySearch(_activeCols, i) >= 0)
                        continue;
                    if (subgrad[i] > grad_eps || -subgrad[i] > grad_eps) {
                        if (fcnt == failedCols.length)
                            failedCols = Arrays.copyOf(failedCols, failedCols.length << 1);
                        failedCols[fcnt++] = i;
                    }
                }
                if (fcnt > 0) {
                    final int n = _activeCols.length;
                    final int[] oldActiveCols = _activeCols;
                    _activeCols = Arrays.copyOf(_activeCols, _activeCols.length + fcnt);
                    for (int i = 0; i < fcnt; ++i) _activeCols[n + i] = failedCols[i];
                    Arrays.sort(_activeCols);
                    LogInfo(fcnt + " variables failed KKT conditions check! Adding them to the model and continuing computation.(grad_eps = " + grad_eps + ", activeCols = " + (_activeCols.length > 100 ? "lost" : Arrays.toString(_activeCols)));
                    _activeData = _srcDinfo.filterExpandedColumns(_activeCols);
                    // NOTE: tricky completer game here:
                    // We expect 0 pending in this method since this is the end-point, ( actually it's racy, can be 1 with pending 1 decrement from the original Iteration callback, end result is 0 though)
                    // while iteration expects pending count of 1, so we need to increase it here (Iteration itself adds 1 but 1 will be subtracted when we leave this method since we're in the callback which is called by onCompletion!
                    // [unlike at the start of nextLambda call when we're not inside onCompletion]))
                    getCompleter().addToPendingCount(1);
                    new GLMIterationTask(_noffsets, GLM2.this.self(), _activeData, _glm, true, true, true, resizeVec(newBeta, _activeCols, oldActiveCols), _ymu, glmt._reg, thresholds, new Iteration(getCompleter())).asyncExec(_activeData._adaptedFrame);
                    return;
                }
            }
            int diff = MAX_ITERATIONS_PER_LAMBDA - _iter + _iter1;
            if (diff > 0)
                // update progress
                new GLM2_ProgressUpdate(diff).fork(_progressKey);
            GLM2.this.setSubmodel(newBeta, glmt2._val, (H2OCountedCompleter) getCompleter().getCompleter());
            _done = true;
            LogInfo("computation of current lambda done in " + (System.currentTimeMillis() - GLM2.this.start_time) + "ms");
            assert _lastResult._fullGrad != null;
        }
    }).asyncExec(_srcDinfo._adaptedFrame);
}
Also used : H2OCallback(water.H2O.H2OCallback) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) H2OCountedCompleter(water.H2O.H2OCountedCompleter)

Example 3 with GLMIterationTask

use of hex.glm.GLMTask.GLMIterationTask in project h2o-2 by h2oai.

the class GLM2 method run.

public void run(boolean doLog, H2OCountedCompleter cmp) {
    if (doLog)
        logStart();
    // just fork off the nfolds+1 tasks and wait for the results
    assert alpha.length == 1;
    start_time = System.currentTimeMillis();
    if (nlambdas == -1)
        nlambdas = 100;
    if (lambda_search && nlambdas <= 1)
        throw new IllegalArgumentException(LogInfo("GLM2: nlambdas must be > 1 when running with lambda search."));
    Futures fs = new Futures();
    Key dst = dest();
    new YMUTask(GLM2.this.self(), _srcDinfo, n_folds, new H2OCallback<YMUTask>(cmp) {

        @Override
        public String toString() {
            return "YMUTask callback. completer = " + getCompleter() != null ? "null" : getCompleter().toString();
        }

        @Override
        public void callback(final YMUTask ymut) {
            if (ymut._ymin == ymut._ymax)
                throw new IllegalArgumentException(LogInfo("GLM2: attempted to run with constant response. Response == " + ymut._ymin + " for all rows in the training set."));
            if (ymut.nobs() == 0)
                throw new IllegalArgumentException(LogInfo("GLM2: got no active rows in the dataset after discarding rows with NAs"));
            _ymu = ymut.ymu();
            _nobs = ymut.nobs();
            if (_glm.family == Family.binomial && prior != -1 && prior != _ymu && !Double.isNaN(prior)) {
                _iceptAdjust = -Math.log(_ymu * (1 - prior) / (prior * (1 - _ymu)));
            } else
                prior = _ymu;
            H2OCountedCompleter cmp = (H2OCountedCompleter) getCompleter();
            cmp.addToPendingCount(1);
            // public GLMIterationTask(int noff, Key jobKey, DataInfo dinfo, GLMParams glm, boolean computeGram, boolean validate, boolean computeGradient, double [] beta, double ymu, double reg, float [] thresholds, H2OCountedCompleter cmp) {
            new GLMIterationTask(_noffsets, GLM2.this.self(), _srcDinfo, _glm, false, true, true, nullModelBeta(_srcDinfo, _ymu), _ymu, 1.0 / _nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp) {

                @Override
                public String toString() {
                    return "LMAXTask callback. completer = " + (getCompleter() != null ? "NULL" : getCompleter().toString());
                }

                @Override
                public void callback(final GLMIterationTask glmt) {
                    double[] beta = glmt._beta;
                    if (beta_start == null) {
                        beta_start = beta;
                    }
                    _nullDeviance = glmt._val.residualDeviance();
                    _currentLambda = lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
                    _lastResult = makeIterationInfo(0, glmt, null, glmt.gradient(0, 0));
                    GLMModel model = new GLMModel(GLM2.this, dest(), _srcDinfo, _glm, glmt._val, beta_epsilon, alpha[0], lambda_max, _ymu, prior);
                    model.start_training(start_time);
                    if (lambda_search) {
                        assert !Double.isNaN(lambda_max) : LogInfo("running lambda_value search, but don't know what is the lambda_value max!");
                        model = addLmaxSubmodel(model, glmt._val, beta);
                        if (nlambdas == -1) {
                            lambda = null;
                        } else {
                            if (lambda_min_ratio == -1)
                                lambda_min_ratio = _nobs > 25 * _srcDinfo.fullN() ? 1e-4 : 1e-2;
                            final double d = Math.pow(lambda_min_ratio, 1.0 / (nlambdas - 1));
                            if (nlambdas == 0)
                                throw new IllegalArgumentException("nlambdas must be > 0 when running lambda search.");
                            lambda = new double[nlambdas];
                            lambda[0] = lambda_max;
                            if (nlambdas == 1)
                                throw new IllegalArgumentException("Number of lambdas must be > 1 when running with lambda_search!");
                            for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
                            lambda_min = lambda[lambda.length - 1];
                            max_iter = MAX_ITERATIONS_PER_LAMBDA * nlambdas;
                        }
                        _runAllLambdas = false;
                    } else {
                        if (lambda == null || lambda.length == 0)
                            lambda = new double[] { DEFAULT_LAMBDA };
                        int i = 0;
                        while (i < lambda.length && lambda[i] > lambda_max) ++i;
                        if (i == lambda.length)
                            throw new IllegalArgumentException("Given lambda(s) are all > lambda_max = " + lambda_max + ", have nothing to run with. lambda = " + Arrays.toString(lambda));
                        if (i > 0) {
                            model.addWarning("Removed " + i + " lambdas greater than lambda_max.");
                            lambda = Utils.append(new double[] { lambda_max }, Arrays.copyOfRange(lambda, i, lambda.length));
                            addLmaxSubmodel(model, glmt._val, beta);
                        }
                    }
                    model.delete_and_lock(self());
                    lambda_min = lambda[lambda.length - 1];
                    if (n_folds > 1) {
                        final H2OCountedCompleter futures = new H2OEmptyCompleter();
                        final GLM2[] xvals = new GLM2[n_folds + 1];
                        futures.addToPendingCount(xvals.length - 2);
                        for (int i = 0; i < xvals.length; ++i) {
                            xvals[i] = (GLM2) GLM2.this.clone();
                            xvals[i].n_folds = 0;
                            xvals[i].standardize = standardize;
                            xvals[i].family = family;
                            xvals[i].link = link;
                            xvals[i].beta_epsilon = beta_epsilon;
                            xvals[i].max_iter = max_iter;
                            xvals[i].variable_importances = variable_importances;
                            if (i != 0) {
                                xvals[i]._srcDinfo = _srcDinfo.getFold(i - 1, n_folds);
                                xvals[i].destination_key = Key.make(dest().toString() + "_xval_" + i, (byte) 1, Key.HIDDEN_USER_KEY, H2O.SELF);
                                xvals[i]._nobs = ymut.nobs(i - 1);
                                xvals[i]._ymu = ymut.ymu(i - 1);
                                final int fi = i;
                                final double ymu = ymut.ymu(fi - 1);
                                // new GLMIterationTask(offset_cols.length,GLM2.this.self(), _srcDinfo, _glm, false, true, true,nullModelBeta(),_ymu,1.0/_nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp){
                                new GLMIterationTask(_noffsets, self(), xvals[i]._srcDinfo, _glm, false, true, true, nullModelBeta(xvals[fi]._srcDinfo, ymu), ymu, 1.0 / ymut.nobs(fi - 1), thresholds, new H2OCallback<GLMIterationTask>(futures) {

                                    @Override
                                    public String toString() {
                                        return "Xval LMAXTask callback., completer = " + getCompleter() == null ? "null" : getCompleter().toString();
                                    }

                                    @Override
                                    public void callback(GLMIterationTask t) {
                                        xvals[fi].beta_start = t._beta;
                                        xvals[fi]._currentLambda = xvals[fi].lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
                                        assert xvals[fi].lambda_max > 0;
                                        xvals[fi]._lastResult = makeIterationInfo(0, t, null, t.gradient(alpha[0], 0));
                                        //.delete_and_lock(self());
                                        GLMModel m = new GLMModel(GLM2.this, xvals[fi].destination_key, xvals[fi]._srcDinfo, _glm, t._val, beta_epsilon, alpha[0], xvals[fi].lambda_max, xvals[fi]._ymu, prior);
                                        m.submodels = new Submodel[] { new Submodel(xvals[fi].lambda_max, t._beta, t._beta, 0, 0, t._beta.length >= sparseCoefThreshold) };
                                        m.submodels[0].validation = t._val;
                                        assert t._val != null;
                                        m.setSubmodelIdx(0);
                                        m.delete_and_lock(self());
                                        if (xvals[fi].lambda_max > lambda_max) {
                                            futures.addToPendingCount(1);
                                            new ParallelGLMs(GLM2.this, new GLM2[] { xvals[fi] }, lambda_max, 1, futures).fork();
                                        }
                                    }
                                }).asyncExec(xvals[i]._srcDinfo._adaptedFrame);
                            }
                        }
                        _xvals = xvals;
                        futures.join();
                    }
                    getCompleter().addToPendingCount(1);
                    nextLambda(nextLambdaValue(), new LambdaIteration(getCompleter()));
                }
            }).asyncExec(_srcDinfo._adaptedFrame);
        }
    }).asyncExec(_srcDinfo._adaptedFrame);
}
Also used : Submodel(hex.glm.GLMModel.Submodel) YMUTask(hex.glm.GLMTask.YMUTask) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) H2OCallback(water.H2O.H2OCallback) H2OCountedCompleter(water.H2O.H2OCountedCompleter) H2OEmptyCompleter(water.H2O.H2OEmptyCompleter)

Example 4 with GLMIterationTask

use of hex.glm.GLMTask.GLMIterationTask in project h2o-2 by h2oai.

the class GLMTest2 method testProximal.

@Test
public void testProximal() {
    //    glmnet's result:
    //    res2 <- glmnet(x=M,y=D$CAPSULE,lower.limits=-.5,upper.limits=.5,family='binomial')
    //    res2$beta[,58]
    //    AGE        RACE          DPROS       PSA         VOL         GLEASON
    //    -0.00616326 -0.50000000  0.50000000  0.03628192 -0.01249324  0.50000000 //    res2$a0[100]
    //    res2$a0[58]
    //    s57
    //    -4.155864
    //    lambda = 0.001108, null dev =  512.2888, res dev = 379.7597
    Key parsed = Key.make("prostate_parsed");
    Key modelKey = Key.make("prostate_model");
    GLMModel model = null;
    Frame fr = getFrameForFile(parsed, "smalldata/logreg/prostate.csv", new String[] { "ID" }, "CAPSULE");
    Key k = Key.make("rebalanced");
    H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
    fr.delete();
    fr = DKV.get(k).get();
    fr.remove("ID");
    Key betaConsKey = Key.make("beta_constraints");
    //String[] cfs1 = new String[]{"RACE", "AGE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON","Intercept"};
    //double[] vals = new double[]{0, 0, 0.54788332,0.53816534, 0.02380097, 0, 0.98115670,-8.945984};
    // [AGE, RACE, DPROS, DCAPS, PSA, VOL, GLEASON, Intercept]
    FVecTest.makeByteVec(betaConsKey, "names, beta_given, rho\n AGE, 0.1, 1\nRACE, -0.1, 1 \n DPROS, 10, 1 \n DCAPS, -10, 1 \n PSA, 0, 1\n VOL, 0, 1\nGLEASON, 0, 1\n Intercept, 0, 0 \n");
    Frame betaConstraints = ParseDataset2.parse(parsed, new Key[] { betaConsKey });
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLM2.Source src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), false, true);
        //.setHighAccuracy().doInit().fork().get();
        new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
        model = DKV.get(modelKey).get();
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        DataInfo dinfo = new DataInfo(fr, 1, true, false, TransformType.NONE, DataInfo.TransformType.NONE);
        GLMIterationTask glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.beta(), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
        double[] beta = model.beta();
        double[] grad = glmt.gradient(0, 0);
        for (int i = 0; i < beta.length; ++i) Assert.assertEquals(0, grad[i] + betaConstraints.vec("rho").at(i) * (beta[i] - betaConstraints.vec("beta_given").at(i)), 1e-8);
        // now standardized
        src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), true, true);
        //.setHighAccuracy().doInit().fork().get();
        new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
        model = DKV.get(modelKey).get();
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        dinfo = new DataInfo(fr, 1, true, false, TransformType.STANDARDIZE, DataInfo.TransformType.NONE);
        glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.norm_beta(0), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
        double[] beta2 = model.norm_beta(0);
        double[] grad2 = glmt.gradient(0, 0);
        for (int i = 0; i < beta.length - 1; ++i) Assert.assertEquals("grad[" + i + "] != 0", 0, grad2[i] + betaConstraints.vec("rho").at(i) * (beta2[i] - betaConstraints.vec("beta_given").at(i) * dinfo._adaptedFrame.vec(i).sigma()), 1e-8);
        Assert.assertEquals("grad[intercept] != 0", 0, grad2[grad2.length - 1], 1e-8);
    } finally {
        fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Source(hex.glm.GLM2.Source) DataInfo(hex.FrameTask.DataInfo) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) Source(hex.glm.GLM2.Source) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) Test(org.junit.Test)

Aggregations

GLMIterationTask (hex.glm.GLMTask.GLMIterationTask)4 H2OCallback (water.H2O.H2OCallback)2 H2OCountedCompleter (water.H2O.H2OCountedCompleter)2 DataInfo (hex.FrameTask.DataInfo)1 Source (hex.glm.GLM2.Source)1 Submodel (hex.glm.GLMModel.Submodel)1 YMUTask (hex.glm.GLMTask.YMUTask)1 Test (org.junit.Test)1 H2OEmptyCompleter (water.H2O.H2OEmptyCompleter)1