use of hex.DataInfo in project h2o-3 by h2oai.
the class MakeGLMModelHandler method make_model.
public GLMModelV3 make_model(int version, MakeGLMModelV3 args) {
GLMModel model = DKV.getGet(args.model.key());
if (model == null)
throw new IllegalArgumentException("missing source model " + args.model);
String[] names = model._output.coefficientNames();
Map<String, Double> coefs = model.coefficients();
for (int i = 0; i < args.names.length; ++i) coefs.put(args.names[i], args.beta[i]);
double[] beta = model.beta().clone();
for (int i = 0; i < beta.length; ++i) beta[i] = coefs.get(names[i]);
GLMModel m = new GLMModel(args.dest != null ? args.dest.key() : Key.make(), model._parms, null, model._ymu, Double.NaN, Double.NaN, -1);
DataInfo dinfo = model.dinfo();
dinfo.setPredictorTransform(TransformType.NONE);
// GLMOutput(DataInfo dinfo, String[] column_names, String[][] domains, String[] coefficient_names, boolean binomial) {
m._output = new GLMOutput(model.dinfo(), model._output._names, model._output._domains, model._output.coefficientNames(), model._output._binomial, beta);
DKV.put(m._key, m);
GLMModelV3 res = new GLMModelV3();
res.fillFromImpl(m);
return res;
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class MakeGLMModelHandler method computeGram.
public GramV3 computeGram(int v, GramV3 input) {
if (DKV.get(input.X.key()) == null)
throw new IllegalArgumentException("Frame " + input.X.key() + " does not exist.");
Frame fr = input.X.key().get();
Frame frcpy = new Frame(fr._names.clone(), fr.vecs().clone());
String wname = null;
Vec weight = null;
if (input.W != null && !input.W.column_name.isEmpty()) {
wname = input.W.column_name;
if (fr.find(wname) == -1)
throw new IllegalArgumentException("Did not find weight vector " + wname);
weight = frcpy.remove(wname);
}
DataInfo dinfo = new DataInfo(frcpy, null, 0, input.use_all_factor_levels, input.standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, input.skip_missing, false, !input.skip_missing, /* weight */
false, /* offset */
false, /* fold */
false, /* intercept */
true);
DKV.put(dinfo);
if (weight != null)
dinfo.setWeights(wname, weight);
Gram.GramTask gt = new Gram.GramTask(null, dinfo, false, true).doAll(dinfo._adaptedFrame);
double[][] gram = gt._gram.getXX();
dinfo.remove();
String[] names = water.util.ArrayUtils.append(dinfo.coefNames(), "Intercept");
Vec[] vecs = new Vec[gram.length];
Key[] keys = new VectorGroup().addVecs(vecs.length);
for (int i = 0; i < vecs.length; ++i) vecs[i] = Vec.makeVec(gram[i], keys[i]);
input.destination_frame = new KeyV3.FrameKeyV3();
String keyname = input.X.key().toString();
if (keyname.endsWith(".hex"))
keyname = keyname.substring(0, keyname.lastIndexOf("."));
keyname = keyname + "_gram";
if (weight != null)
keyname = keyname + "_" + wname;
Key k = Key.make(keyname);
if (DKV.get(k) != null) {
int cnt = 0;
while (cnt < 1000 && DKV.get(k = Key.make(keyname + "_" + cnt)) != null) cnt++;
if (cnt == 1000)
throw new IllegalArgumentException("unable to make unique key");
}
input.destination_frame.fillFromImpl(k);
DKV.put(new Frame(k, names, vecs));
return input;
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class ComputationState method applyStrongRulesMultinomial.
/**
* Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
*
* @return indices of expected active predictors.
*/
protected void applyStrongRulesMultinomial(double lambdaNew, double lambdaOld) {
int P = _dinfo.fullN();
int N = P + 1;
int selected = 0;
_activeBC = _bc;
_activeData = _dinfo;
if (!_allIn) {
if (_activeDataMultinomial == null)
_activeDataMultinomial = new DataInfo[_nclasses];
final double rhs = _alpha * (2 * lambdaNew - lambdaOld);
int[] cols = MemoryManager.malloc4(N * _nclasses);
int oldActiveColsTotal = 0;
for (int c = 0; c < _nclasses; ++c) {
int j = 0;
int[] oldActiveCols = _activeDataMultinomial[c] == null ? new int[] { P } : _activeDataMultinomial[c]._activeCols;
oldActiveColsTotal += oldActiveCols.length;
for (int i = 0; i < P; ++i) {
if (j < oldActiveCols.length && i == oldActiveCols[j]) {
++j;
} else if (_ginfo._gradient[c * N + i] > rhs || _ginfo._gradient[c * N + i] < -rhs) {
cols[selected++] = c * N + i;
}
}
}
if (_parms._max_active_predictors != -1 && _parms._max_active_predictors - oldActiveColsTotal + _nclasses < selected) {
Integer[] bigInts = ArrayUtils.toIntegers(cols, 0, selected);
Arrays.sort(bigInts, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return (int) Math.signum(_ginfo._gradient[o2.intValue()] * _ginfo._gradient[o2.intValue()] - _ginfo._gradient[o1.intValue()] * _ginfo._gradient[o1.intValue()]);
}
});
cols = ArrayUtils.toInt(bigInts, 0, _parms._max_active_predictors - oldActiveColsTotal + _nclasses);
Arrays.sort(cols);
selected = cols.length;
}
int i = 0;
int[] cs = new int[P + 1];
int sum = 0;
for (int c = 0; c < _nclasses; ++c) {
int[] classcols = cs;
int[] oldActiveCols = _activeDataMultinomial[c] == null ? new int[] { P } : _activeDataMultinomial[c]._activeCols;
int k = 0;
while (i < selected && cols[i] < (c + 1) * N) classcols[k++] = cols[i++] - c * N;
classcols = ArrayUtils.sortedMerge(oldActiveCols, Arrays.copyOf(classcols, k));
sum += classcols.length;
_activeDataMultinomial[c] = _dinfo.filterExpandedColumns(classcols);
}
assert _parms._max_active_predictors == -1 || sum <= _parms._max_active_predictors + _nclasses : "sum = " + sum + " max_active_preds = " + _parms._max_active_predictors + ", nclasses = " + _nclasses;
_allIn = sum == N * _nclasses;
}
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class ComputationState method adjustToNewLambda.
private void adjustToNewLambda(double lambdaNew, double lambdaOld) {
double ldiff = lambdaNew - lambdaOld;
if (ldiff == 0 || l2pen() == 0)
return;
double l2pen = .5 * ArrayUtils.l2norm2(_beta, true);
if (l2pen > 0) {
if (_parms._family == Family.multinomial) {
int off = 0;
for (int c = 0; c < _nclasses; ++c) {
DataInfo activeData = activeDataMultinomial(c);
for (int i = 0; i < activeData.fullN(); ++i) _ginfo._gradient[off + i] += ldiff * _beta[off + i];
off += activeData.fullN() + 1;
}
} else
for (int i = 0; i < _activeData.fullN(); ++i) _ginfo._gradient[i] += ldiff * _beta[i];
}
_ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal + ldiff * l2pen, _ginfo._gradient);
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class LinearAlgebraUtils method toEigen.
public static Vec toEigen(Vec src) {
Frame train = new Frame(Key.<Frame>make(), new String[] { "enum" }, new Vec[] { src });
DataInfo dinfo = new DataInfo(train, null, 0, true, /*_use_all_factor_levels*/
DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, /* skipMissing */
false, /* imputeMissing */
true, /* missingBucket */
false, /* weights */
false, /* offset */
false, /* fold */
false, /* intercept */
false);
DKV.put(dinfo);
Gram.GramTask gtsk = new Gram.GramTask(null, dinfo).doAll(dinfo._adaptedFrame);
// round the numbers to float precision to be more reproducible
// double[] rounded = gtsk._gram._diag;
double[] rounded = new double[gtsk._gram._diag.length];
for (int i = 0; i < rounded.length; ++i) rounded[i] = (float) gtsk._gram._diag[i];
dinfo.remove();
Vec v = new ProjectOntoEigenVector(multiple(rounded, (int) gtsk._nobs, 1)).doAll(1, (byte) 3, train).outputFrame().anyVec();
return v;
}
Aggregations