use of water.H2O.H2OCallback in project h2o-2 by h2oai.
the class GLM2 method checkKKTAndComplete.
protected void checkKKTAndComplete(final CountedCompleter cc, final GLMIterationTask glmt, final double[] newBeta, final boolean failedLineSearch) {
H2OCountedCompleter cmp = (H2OCountedCompleter) cc;
final double[] fullBeta = newBeta == null ? MemoryManager.malloc8d(_srcDinfo.fullN() + _intercept - _noffsets) : expandVec(newBeta, _activeCols);
// now we need full gradient (on all columns) using this beta
new GLMIterationTask(_noffsets, GLM2.this.self(), _srcDinfo, _glm, false, true, true, fullBeta, _ymu, 1.0 / _nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp) {
@Override
public String toString() {
return "checkKKTAndComplete.Callback, completer = " + getCompleter() == null ? "null" : getCompleter().toString();
}
@Override
public void callback(final GLMIterationTask glmt2) {
// first check KKT conditions!
final double[] grad = glmt2.gradient(alpha[0], _currentLambda);
if (Utils.hasNaNsOrInfs(grad)) {
_failedLineSearch = true;
// TODO: add warning and break the lambda search? Or throw Exception?
}
glmt._val = glmt2._val;
_lastResult = makeIterationInfo(_iter, glmt2, null, glmt2.gradient(alpha[0], 0));
// check the KKT conditions and filter data for next lambda_value
// check the gradient
double[] subgrad = grad.clone();
ADMMSolver.subgrad(alpha[0], _currentLambda, fullBeta, subgrad);
double grad_eps = GLM_GRAD_EPS;
if (!failedLineSearch && _activeCols != null) {
for (int c = 0; c < _activeCols.length - _noffsets; ++c) if (subgrad[_activeCols[c]] > grad_eps)
grad_eps = subgrad[_activeCols[c]];
else if (subgrad[c] < -grad_eps)
grad_eps = -subgrad[_activeCols[c]];
int[] failedCols = new int[64];
int fcnt = 0;
for (int i = 0; i < grad.length - 1; ++i) {
if (Arrays.binarySearch(_activeCols, i) >= 0)
continue;
if (subgrad[i] > grad_eps || -subgrad[i] > grad_eps) {
if (fcnt == failedCols.length)
failedCols = Arrays.copyOf(failedCols, failedCols.length << 1);
failedCols[fcnt++] = i;
}
}
if (fcnt > 0) {
final int n = _activeCols.length;
final int[] oldActiveCols = _activeCols;
_activeCols = Arrays.copyOf(_activeCols, _activeCols.length + fcnt);
for (int i = 0; i < fcnt; ++i) _activeCols[n + i] = failedCols[i];
Arrays.sort(_activeCols);
LogInfo(fcnt + " variables failed KKT conditions check! Adding them to the model and continuing computation.(grad_eps = " + grad_eps + ", activeCols = " + (_activeCols.length > 100 ? "lost" : Arrays.toString(_activeCols)));
_activeData = _srcDinfo.filterExpandedColumns(_activeCols);
// NOTE: tricky completer game here:
// We expect 0 pending in this method since this is the end-point, ( actually it's racy, can be 1 with pending 1 decrement from the original Iteration callback, end result is 0 though)
// while iteration expects pending count of 1, so we need to increase it here (Iteration itself adds 1 but 1 will be subtracted when we leave this method since we're in the callback which is called by onCompletion!
// [unlike at the start of nextLambda call when we're not inside onCompletion]))
getCompleter().addToPendingCount(1);
new GLMIterationTask(_noffsets, GLM2.this.self(), _activeData, _glm, true, true, true, resizeVec(newBeta, _activeCols, oldActiveCols), _ymu, glmt._reg, thresholds, new Iteration(getCompleter())).asyncExec(_activeData._adaptedFrame);
return;
}
}
int diff = MAX_ITERATIONS_PER_LAMBDA - _iter + _iter1;
if (diff > 0)
// update progress
new GLM2_ProgressUpdate(diff).fork(_progressKey);
GLM2.this.setSubmodel(newBeta, glmt2._val, (H2OCountedCompleter) getCompleter().getCompleter());
_done = true;
LogInfo("computation of current lambda done in " + (System.currentTimeMillis() - GLM2.this.start_time) + "ms");
assert _lastResult._fullGrad != null;
}
}).asyncExec(_srcDinfo._adaptedFrame);
}
use of water.H2O.H2OCallback in project h2o-2 by h2oai.
the class GLM2 method run.
public void run(boolean doLog, H2OCountedCompleter cmp) {
if (doLog)
logStart();
// just fork off the nfolds+1 tasks and wait for the results
assert alpha.length == 1;
start_time = System.currentTimeMillis();
if (nlambdas == -1)
nlambdas = 100;
if (lambda_search && nlambdas <= 1)
throw new IllegalArgumentException(LogInfo("GLM2: nlambdas must be > 1 when running with lambda search."));
Futures fs = new Futures();
Key dst = dest();
new YMUTask(GLM2.this.self(), _srcDinfo, n_folds, new H2OCallback<YMUTask>(cmp) {
@Override
public String toString() {
return "YMUTask callback. completer = " + getCompleter() != null ? "null" : getCompleter().toString();
}
@Override
public void callback(final YMUTask ymut) {
if (ymut._ymin == ymut._ymax)
throw new IllegalArgumentException(LogInfo("GLM2: attempted to run with constant response. Response == " + ymut._ymin + " for all rows in the training set."));
if (ymut.nobs() == 0)
throw new IllegalArgumentException(LogInfo("GLM2: got no active rows in the dataset after discarding rows with NAs"));
_ymu = ymut.ymu();
_nobs = ymut.nobs();
if (_glm.family == Family.binomial && prior != -1 && prior != _ymu && !Double.isNaN(prior)) {
_iceptAdjust = -Math.log(_ymu * (1 - prior) / (prior * (1 - _ymu)));
} else
prior = _ymu;
H2OCountedCompleter cmp = (H2OCountedCompleter) getCompleter();
cmp.addToPendingCount(1);
// public GLMIterationTask(int noff, Key jobKey, DataInfo dinfo, GLMParams glm, boolean computeGram, boolean validate, boolean computeGradient, double [] beta, double ymu, double reg, float [] thresholds, H2OCountedCompleter cmp) {
new GLMIterationTask(_noffsets, GLM2.this.self(), _srcDinfo, _glm, false, true, true, nullModelBeta(_srcDinfo, _ymu), _ymu, 1.0 / _nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp) {
@Override
public String toString() {
return "LMAXTask callback. completer = " + (getCompleter() != null ? "NULL" : getCompleter().toString());
}
@Override
public void callback(final GLMIterationTask glmt) {
double[] beta = glmt._beta;
if (beta_start == null) {
beta_start = beta;
}
_nullDeviance = glmt._val.residualDeviance();
_currentLambda = lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
_lastResult = makeIterationInfo(0, glmt, null, glmt.gradient(0, 0));
GLMModel model = new GLMModel(GLM2.this, dest(), _srcDinfo, _glm, glmt._val, beta_epsilon, alpha[0], lambda_max, _ymu, prior);
model.start_training(start_time);
if (lambda_search) {
assert !Double.isNaN(lambda_max) : LogInfo("running lambda_value search, but don't know what is the lambda_value max!");
model = addLmaxSubmodel(model, glmt._val, beta);
if (nlambdas == -1) {
lambda = null;
} else {
if (lambda_min_ratio == -1)
lambda_min_ratio = _nobs > 25 * _srcDinfo.fullN() ? 1e-4 : 1e-2;
final double d = Math.pow(lambda_min_ratio, 1.0 / (nlambdas - 1));
if (nlambdas == 0)
throw new IllegalArgumentException("nlambdas must be > 0 when running lambda search.");
lambda = new double[nlambdas];
lambda[0] = lambda_max;
if (nlambdas == 1)
throw new IllegalArgumentException("Number of lambdas must be > 1 when running with lambda_search!");
for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
lambda_min = lambda[lambda.length - 1];
max_iter = MAX_ITERATIONS_PER_LAMBDA * nlambdas;
}
_runAllLambdas = false;
} else {
if (lambda == null || lambda.length == 0)
lambda = new double[] { DEFAULT_LAMBDA };
int i = 0;
while (i < lambda.length && lambda[i] > lambda_max) ++i;
if (i == lambda.length)
throw new IllegalArgumentException("Given lambda(s) are all > lambda_max = " + lambda_max + ", have nothing to run with. lambda = " + Arrays.toString(lambda));
if (i > 0) {
model.addWarning("Removed " + i + " lambdas greater than lambda_max.");
lambda = Utils.append(new double[] { lambda_max }, Arrays.copyOfRange(lambda, i, lambda.length));
addLmaxSubmodel(model, glmt._val, beta);
}
}
model.delete_and_lock(self());
lambda_min = lambda[lambda.length - 1];
if (n_folds > 1) {
final H2OCountedCompleter futures = new H2OEmptyCompleter();
final GLM2[] xvals = new GLM2[n_folds + 1];
futures.addToPendingCount(xvals.length - 2);
for (int i = 0; i < xvals.length; ++i) {
xvals[i] = (GLM2) GLM2.this.clone();
xvals[i].n_folds = 0;
xvals[i].standardize = standardize;
xvals[i].family = family;
xvals[i].link = link;
xvals[i].beta_epsilon = beta_epsilon;
xvals[i].max_iter = max_iter;
xvals[i].variable_importances = variable_importances;
if (i != 0) {
xvals[i]._srcDinfo = _srcDinfo.getFold(i - 1, n_folds);
xvals[i].destination_key = Key.make(dest().toString() + "_xval_" + i, (byte) 1, Key.HIDDEN_USER_KEY, H2O.SELF);
xvals[i]._nobs = ymut.nobs(i - 1);
xvals[i]._ymu = ymut.ymu(i - 1);
final int fi = i;
final double ymu = ymut.ymu(fi - 1);
// new GLMIterationTask(offset_cols.length,GLM2.this.self(), _srcDinfo, _glm, false, true, true,nullModelBeta(),_ymu,1.0/_nobs, thresholds, new H2OCallback<GLMIterationTask>(cmp){
new GLMIterationTask(_noffsets, self(), xvals[i]._srcDinfo, _glm, false, true, true, nullModelBeta(xvals[fi]._srcDinfo, ymu), ymu, 1.0 / ymut.nobs(fi - 1), thresholds, new H2OCallback<GLMIterationTask>(futures) {
@Override
public String toString() {
return "Xval LMAXTask callback., completer = " + getCompleter() == null ? "null" : getCompleter().toString();
}
@Override
public void callback(GLMIterationTask t) {
xvals[fi].beta_start = t._beta;
xvals[fi]._currentLambda = xvals[fi].lambda_max = Math.max(Utils.maxValue(glmt._grad), -Utils.minValue(glmt._grad)) / Math.max(1e-3, alpha[0]);
assert xvals[fi].lambda_max > 0;
xvals[fi]._lastResult = makeIterationInfo(0, t, null, t.gradient(alpha[0], 0));
//.delete_and_lock(self());
GLMModel m = new GLMModel(GLM2.this, xvals[fi].destination_key, xvals[fi]._srcDinfo, _glm, t._val, beta_epsilon, alpha[0], xvals[fi].lambda_max, xvals[fi]._ymu, prior);
m.submodels = new Submodel[] { new Submodel(xvals[fi].lambda_max, t._beta, t._beta, 0, 0, t._beta.length >= sparseCoefThreshold) };
m.submodels[0].validation = t._val;
assert t._val != null;
m.setSubmodelIdx(0);
m.delete_and_lock(self());
if (xvals[fi].lambda_max > lambda_max) {
futures.addToPendingCount(1);
new ParallelGLMs(GLM2.this, new GLM2[] { xvals[fi] }, lambda_max, 1, futures).fork();
}
}
}).asyncExec(xvals[i]._srcDinfo._adaptedFrame);
}
}
_xvals = xvals;
futures.join();
}
getCompleter().addToPendingCount(1);
nextLambda(nextLambdaValue(), new LambdaIteration(getCompleter()));
}
}).asyncExec(_srcDinfo._adaptedFrame);
}
}).asyncExec(_srcDinfo._adaptedFrame);
}
use of water.H2O.H2OCallback in project h2o-2 by h2oai.
the class Order method serve.
@Override
protected Response serve() {
if (// global order not supported
n > 10000)
throw H2O.unimpl();
long[] espc = new long[] { 0, n };
final Vec[] dst = new Vec(Vec.newKey(), espc).makeZeros(cols.length);
H2OEmptyCompleter cmp = new H2OEmptyCompleter();
cmp.setPendingCount(cols.length - 1);
final int addOne = (add_one ? 1 : 0);
for (int i = 0; i < cols.length; ++i) {
final int fi = i;
new OrderTsk(new H2OCallback<OrderTsk>(cmp) {
@Override
public void callback(OrderTsk ot) {
Vec.Writer w = dst[fi].open();
for (int j = 0; j < ot._ids.length; ++j) w.set(j, ot._ids[j] + addOne);
w.close();
}
}, n, rev).asyncExec(source.vec(cols[i]));
}
cmp.join();
Futures fs = new Futures();
if (destination_key == null)
destination_key = Key.make(source._key.toString() + ".order");
DKV.put(destination_key, new Frame(destination_key, Utils.select(source.names(), cols), dst), fs);
fs.blockForPending();
return Inspect2.redirect(this, destination_key.toString());
}
Aggregations