use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.
the class ComputationState method applyStrongRules.
/**
* Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
*
* @return indices of expected active predictors.
*/
protected void applyStrongRules(double lambdaNew, double lambdaOld) {
lambdaNew = Math.min(_lambdaMax, lambdaNew);
lambdaOld = Math.min(_lambdaMax, lambdaOld);
if (_parms._family == Family.multinomial) /* && _parms._solver != GLMParameters.Solver.L_BFGS */
{
applyStrongRulesMultinomial(lambdaNew, lambdaOld);
return;
}
int P = _dinfo.fullN();
_activeBC = _bc;
_activeData = _activeData != null ? _activeData : _dinfo;
_allIn = _allIn || _parms._alpha[0] * lambdaNew == 0 || _activeBC.hasBounds();
if (!_allIn) {
int newlySelected = 0;
final double rhs = Math.max(0, _alpha * (2 * lambdaNew - lambdaOld));
int[] newCols = MemoryManager.malloc4(P);
int j = 0;
int[] oldActiveCols = _activeData._activeCols == null ? new int[] { P } : _activeData.activeCols();
for (int i = 0; i < P; ++i) {
if (j < oldActiveCols.length && oldActiveCols[j] == i)
j++;
else if (_ginfo._gradient[i] > rhs || -_ginfo._gradient[i] > rhs)
newCols[newlySelected++] = i;
}
if (_parms._max_active_predictors != -1 && (oldActiveCols.length + newlySelected - 1) > _parms._max_active_predictors) {
Integer[] bigInts = ArrayUtils.toIntegers(newCols, 0, newlySelected);
Arrays.sort(bigInts, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return (int) Math.signum(_ginfo._gradient[o2.intValue()] * _ginfo._gradient[o2.intValue()] - _ginfo._gradient[o1.intValue()] * _ginfo._gradient[o1.intValue()]);
}
});
newCols = ArrayUtils.toInt(bigInts, 0, _parms._max_active_predictors - oldActiveCols.length + 1);
Arrays.sort(newCols);
} else
newCols = Arrays.copyOf(newCols, newlySelected);
newCols = ArrayUtils.sortedMerge(oldActiveCols, newCols);
// merge already active columns in
int active = newCols.length;
_allIn = active == P;
if (!_allIn) {
int[] cols = newCols;
// intercept is always selected, even if it is false (it's gonna be dropped later, it is needed for other stuff too)
assert cols[active - 1] == P;
_beta = ArrayUtils.select(_beta, cols);
if (_u != null)
_u = ArrayUtils.select(_u, cols);
_activeData = _dinfo.filterExpandedColumns(cols);
assert _activeData.activeCols().length == _beta.length;
assert _u == null || _activeData.activeCols().length == _u.length;
_ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal, ArrayUtils.select(_ginfo._gradient, cols));
_activeBC = _bc.filterExpandedColumns(_activeData.activeCols());
_gslvr = new GLMGradientSolver(_job, _parms, _activeData, (1 - _alpha) * _lambda, _bc);
assert _beta.length == cols.length;
return;
}
}
_activeData = _dinfo;
}
use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.
the class ComputationState method checkKKTs.
protected boolean checkKKTs() {
if (_parms._family == Family.multinomial)
return checkKKTsMultinomial();
double[] beta = _beta;
double[] u = _u;
if (_activeData._activeCols != null) {
beta = ArrayUtils.expandAndScatter(beta, _dinfo.fullN() + 1, _activeData._activeCols);
if (_u != null)
u = ArrayUtils.expandAndScatter(_u, _dinfo.fullN() + 1, _activeData._activeCols);
}
int[] activeCols = _activeData.activeCols();
if (beta != _beta || _ginfo == null) {
_gslvr = new GLMGradientSolver(_job, _parms, _dinfo, (1 - _alpha) * _lambda, _bc);
_ginfo = _gslvr.getGradient(beta);
}
double[] grad = _ginfo._gradient.clone();
double err = 1e-4;
if (u != null && u != _u) {
// fill in u for missing variables
int k = 0;
for (int i = 0; i < u.length; ++i) {
if (_activeData._activeCols[k] == i) {
++k;
continue;
}
assert u[i] == 0;
u[i] = -grad[i];
}
}
ADMM.subgrad(_alpha * _lambda, beta, grad);
for (// set the error tolerance to the highest error og included columns
int c : // set the error tolerance to the highest error og included columns
activeCols) if (grad[c] > err)
err = grad[c];
else if (grad[c] < -err)
err = -grad[c];
_gradientErr = err;
_beta = beta;
_u = u;
_activeBC = null;
if (_parms._max_active_predictors == _activeData.fullN()) {
Log.info("skipping KKT check, reached maximum number of active predictors (" + _parms._max_active_predictors + ")");
} else if (!_allIn) {
int[] failedCols = new int[64];
int fcnt = 0;
for (int i = 0; i < grad.length - 1; ++i) {
// always include all previously active columns
if (Arrays.binarySearch(activeCols, i) >= 0)
continue;
if (grad[i] > err || -grad[i] > err) {
if (fcnt == failedCols.length)
failedCols = Arrays.copyOf(failedCols, failedCols.length << 1);
failedCols[fcnt++] = i;
}
}
if (fcnt > 0) {
Log.info(fcnt + " variables failed KKT conditions, adding them to the model and recomputing.");
final int n = activeCols.length;
int[] newCols = Arrays.copyOf(activeCols, activeCols.length + fcnt);
for (int i = 0; i < fcnt; ++i) newCols[n + i] = failedCols[i];
Arrays.sort(newCols);
_beta = ArrayUtils.select(beta, newCols);
if (_u != null)
_u = ArrayUtils.select(_u, newCols);
_ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal, ArrayUtils.select(_ginfo._gradient, newCols));
_activeData = _dinfo.filterExpandedColumns(newCols);
_activeBC = _bc.filterExpandedColumns(_activeData.activeCols());
_gslvr = new GLMGradientSolver(_job, _parms, _activeData, (1 - _alpha) * _lambda, _activeBC);
return false;
}
}
return true;
}
use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.
the class ComputationState method gslvrMultinomial.
public GradientSolver gslvrMultinomial(final int c) {
final double[] fullbeta = _beta.clone();
return new GradientSolver() {
@Override
public GradientInfo getGradient(double[] beta) {
fillSubRange(_activeData.fullN() + 1, c, _activeDataMultinomial[c].activeCols(), beta, fullbeta);
GLMGradientInfo fullGinfo = _gslvr.getGradient(fullbeta);
return new GLMSubsetGinfo(fullGinfo, _activeData.fullN() + 1, c, _activeDataMultinomial[c].activeCols());
}
@Override
public GradientInfo getObjective(double[] beta) {
return getGradient(beta);
}
};
}
use of hex.glm.GLM.GLMGradientInfo in project h2o-3 by h2oai.
the class ComputationState method adjustToNewLambda.
private void adjustToNewLambda(double lambdaNew, double lambdaOld) {
double ldiff = lambdaNew - lambdaOld;
if (ldiff == 0 || l2pen() == 0)
return;
double l2pen = .5 * ArrayUtils.l2norm2(_beta, true);
if (l2pen > 0) {
if (_parms._family == Family.multinomial) {
int off = 0;
for (int c = 0; c < _nclasses; ++c) {
DataInfo activeData = activeDataMultinomial(c);
for (int i = 0; i < activeData.fullN(); ++i) _ginfo._gradient[off + i] += ldiff * _beta[off + i];
off += activeData.fullN() + 1;
}
} else
for (int i = 0; i < _activeData.fullN(); ++i) _ginfo._gradient[i] += ldiff * _beta[i];
}
_ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal + ldiff * l2pen, _ginfo._gradient);
}
Aggregations