Search in sources :

Example 1 with GLMGradientInfo

use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.

the class ComputationState method applyStrongRules.

/**
   * Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
   *
   * @return indices of expected active predictors.
   */
protected void applyStrongRules(double lambdaNew, double lambdaOld) {
    lambdaNew = Math.min(_lambdaMax, lambdaNew);
    lambdaOld = Math.min(_lambdaMax, lambdaOld);
    if (_parms._family == Family.multinomial) /* && _parms._solver != GLMParameters.Solver.L_BFGS */
    {
        applyStrongRulesMultinomial(lambdaNew, lambdaOld);
        return;
    }
    int P = _dinfo.fullN();
    _activeBC = _bc;
    _activeData = _activeData != null ? _activeData : _dinfo;
    _allIn = _allIn || _parms._alpha[0] * lambdaNew == 0 || _activeBC.hasBounds();
    if (!_allIn) {
        int newlySelected = 0;
        final double rhs = Math.max(0, _alpha * (2 * lambdaNew - lambdaOld));
        int[] newCols = MemoryManager.malloc4(P);
        int j = 0;
        int[] oldActiveCols = _activeData._activeCols == null ? new int[] { P } : _activeData.activeCols();
        for (int i = 0; i < P; ++i) {
            if (j < oldActiveCols.length && oldActiveCols[j] == i)
                j++;
            else if (_ginfo._gradient[i] > rhs || -_ginfo._gradient[i] > rhs)
                newCols[newlySelected++] = i;
        }
        if (_parms._max_active_predictors != -1 && (oldActiveCols.length + newlySelected - 1) > _parms._max_active_predictors) {
            Integer[] bigInts = ArrayUtils.toIntegers(newCols, 0, newlySelected);
            Arrays.sort(bigInts, new Comparator<Integer>() {

                @Override
                public int compare(Integer o1, Integer o2) {
                    return (int) Math.signum(_ginfo._gradient[o2.intValue()] * _ginfo._gradient[o2.intValue()] - _ginfo._gradient[o1.intValue()] * _ginfo._gradient[o1.intValue()]);
                }
            });
            newCols = ArrayUtils.toInt(bigInts, 0, _parms._max_active_predictors - oldActiveCols.length + 1);
            Arrays.sort(newCols);
        } else
            newCols = Arrays.copyOf(newCols, newlySelected);
        newCols = ArrayUtils.sortedMerge(oldActiveCols, newCols);
        // merge already active columns in
        int active = newCols.length;
        _allIn = active == P;
        if (!_allIn) {
            int[] cols = newCols;
            // intercept is always selected, even if it is false (it's gonna be dropped later, it is needed for other stuff too)
            assert cols[active - 1] == P;
            _beta = ArrayUtils.select(_beta, cols);
            if (_u != null)
                _u = ArrayUtils.select(_u, cols);
            _activeData = _dinfo.filterExpandedColumns(cols);
            assert _activeData.activeCols().length == _beta.length;
            assert _u == null || _activeData.activeCols().length == _u.length;
            _ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal, ArrayUtils.select(_ginfo._gradient, cols));
            _activeBC = _bc.filterExpandedColumns(_activeData.activeCols());
            _gslvr = new GLMGradientSolver(_job, _parms, _activeData, (1 - _alpha) * _lambda, _bc);
            assert _beta.length == cols.length;
            return;
        }
    }
    _activeData = _dinfo;
}
Also used : GLMGradientInfo(hex.glm.GLM.GLMGradientInfo) GLMGradientSolver(hex.glm.GLM.GLMGradientSolver) BetaConstraint(hex.glm.GLM.BetaConstraint)

Example 2 with GLMGradientInfo

use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.

the class ComputationState method checkKKTs.

protected boolean checkKKTs() {
    if (_parms._family == Family.multinomial)
        return checkKKTsMultinomial();
    double[] beta = _beta;
    double[] u = _u;
    if (_activeData._activeCols != null) {
        beta = ArrayUtils.expandAndScatter(beta, _dinfo.fullN() + 1, _activeData._activeCols);
        if (_u != null)
            u = ArrayUtils.expandAndScatter(_u, _dinfo.fullN() + 1, _activeData._activeCols);
    }
    int[] activeCols = _activeData.activeCols();
    if (beta != _beta || _ginfo == null) {
        _gslvr = new GLMGradientSolver(_job, _parms, _dinfo, (1 - _alpha) * _lambda, _bc);
        _ginfo = _gslvr.getGradient(beta);
    }
    double[] grad = _ginfo._gradient.clone();
    double err = 1e-4;
    if (u != null && u != _u) {
        // fill in u for missing variables
        int k = 0;
        for (int i = 0; i < u.length; ++i) {
            if (_activeData._activeCols[k] == i) {
                ++k;
                continue;
            }
            assert u[i] == 0;
            u[i] = -grad[i];
        }
    }
    ADMM.subgrad(_alpha * _lambda, beta, grad);
    for (// set the error tolerance to the highest error og included columns
    int c : // set the error tolerance to the highest error og included columns
    activeCols) if (grad[c] > err)
        err = grad[c];
    else if (grad[c] < -err)
        err = -grad[c];
    _gradientErr = err;
    _beta = beta;
    _u = u;
    _activeBC = null;
    if (_parms._max_active_predictors == _activeData.fullN()) {
        Log.info("skipping KKT check, reached maximum number of active predictors (" + _parms._max_active_predictors + ")");
    } else if (!_allIn) {
        int[] failedCols = new int[64];
        int fcnt = 0;
        for (int i = 0; i < grad.length - 1; ++i) {
            // always include all previously active columns
            if (Arrays.binarySearch(activeCols, i) >= 0)
                continue;
            if (grad[i] > err || -grad[i] > err) {
                if (fcnt == failedCols.length)
                    failedCols = Arrays.copyOf(failedCols, failedCols.length << 1);
                failedCols[fcnt++] = i;
            }
        }
        if (fcnt > 0) {
            Log.info(fcnt + " variables failed KKT conditions, adding them to the model and recomputing.");
            final int n = activeCols.length;
            int[] newCols = Arrays.copyOf(activeCols, activeCols.length + fcnt);
            for (int i = 0; i < fcnt; ++i) newCols[n + i] = failedCols[i];
            Arrays.sort(newCols);
            _beta = ArrayUtils.select(beta, newCols);
            if (_u != null)
                _u = ArrayUtils.select(_u, newCols);
            _ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal, ArrayUtils.select(_ginfo._gradient, newCols));
            _activeData = _dinfo.filterExpandedColumns(newCols);
            _activeBC = _bc.filterExpandedColumns(_activeData.activeCols());
            _gslvr = new GLMGradientSolver(_job, _parms, _activeData, (1 - _alpha) * _lambda, _activeBC);
            return false;
        }
    }
    return true;
}
Also used : GLMGradientSolver(hex.glm.GLM.GLMGradientSolver) GLMGradientInfo(hex.glm.GLM.GLMGradientInfo) BetaConstraint(hex.glm.GLM.BetaConstraint)

Example 3 with GLMGradientInfo

use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.

the class ComputationState method gslvrMultinomial.

public GradientSolver gslvrMultinomial(final int c) {
    final double[] fullbeta = _beta.clone();
    return new GradientSolver() {

        @Override
        public GradientInfo getGradient(double[] beta) {
            fillSubRange(_activeData.fullN() + 1, c, _activeDataMultinomial[c].activeCols(), beta, fullbeta);
            GLMGradientInfo fullGinfo = _gslvr.getGradient(fullbeta);
            return new GLMSubsetGinfo(fullGinfo, _activeData.fullN() + 1, c, _activeDataMultinomial[c].activeCols());
        }

        @Override
        public GradientInfo getObjective(double[] beta) {
            return getGradient(beta);
        }
    };
}
Also used : GLMGradientInfo(hex.glm.GLM.GLMGradientInfo) GradientSolver(hex.optimization.OptimizationUtils.GradientSolver) GLMGradientSolver(hex.glm.GLM.GLMGradientSolver)

Example 4 with GLMGradientInfo

use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.

the class ComputationState method adjustToNewLambda.

private void adjustToNewLambda(double lambdaNew, double lambdaOld) {
    double ldiff = lambdaNew - lambdaOld;
    if (ldiff == 0 || l2pen() == 0)
        return;
    double l2pen = .5 * ArrayUtils.l2norm2(_beta, true);
    if (l2pen > 0) {
        if (_parms._family == Family.multinomial) {
            int off = 0;
            for (int c = 0; c < _nclasses; ++c) {
                DataInfo activeData = activeDataMultinomial(c);
                for (int i = 0; i < activeData.fullN(); ++i) _ginfo._gradient[off + i] += ldiff * _beta[off + i];
                off += activeData.fullN() + 1;
            }
        } else
            for (int i = 0; i < _activeData.fullN(); ++i) _ginfo._gradient[i] += ldiff * _beta[i];
    }
    _ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal + ldiff * l2pen, _ginfo._gradient);
}
Also used : DataInfo(hex.DataInfo) GLMGradientInfo(hex.glm.GLM.GLMGradientInfo) BetaConstraint(hex.glm.GLM.BetaConstraint)

Aggregations

GLMGradientInfo (hex.glm.GLM.GLMGradientInfo)4 BetaConstraint (hex.glm.GLM.BetaConstraint)3 GLMGradientSolver (hex.glm.GLM.GLMGradientSolver)3 DataInfo (hex.DataInfo)1 GradientSolver (hex.optimization.OptimizationUtils.GradientSolver)1