Search in sources :

Example 6 with DataInfo

use of hex.DataInfo in project h2o-3 by h2oai.

the class MakeGLMModelHandler method make_model.

public GLMModelV3 make_model(int version, MakeGLMModelV3 args) {
    GLMModel model = DKV.getGet(args.model.key());
    if (model == null)
        throw new IllegalArgumentException("missing source model " + args.model);
    String[] names = model._output.coefficientNames();
    Map<String, Double> coefs = model.coefficients();
    for (int i = 0; i < args.names.length; ++i) coefs.put(args.names[i], args.beta[i]);
    double[] beta = model.beta().clone();
    for (int i = 0; i < beta.length; ++i) beta[i] = coefs.get(names[i]);
    GLMModel m = new GLMModel(args.dest != null ? args.dest.key() : Key.make(), model._parms, null, model._ymu, Double.NaN, Double.NaN, -1);
    DataInfo dinfo = model.dinfo();
    dinfo.setPredictorTransform(TransformType.NONE);
    // GLMOutput(DataInfo dinfo, String[] column_names, String[][] domains, String[] coefficient_names, boolean binomial) {
    m._output = new GLMOutput(model.dinfo(), model._output._names, model._output._domains, model._output.coefficientNames(), model._output._binomial, beta);
    DKV.put(m._key, m);
    GLMModelV3 res = new GLMModelV3();
    res.fillFromImpl(m);
    return res;
}
Also used : DataInfo(hex.DataInfo) GLMOutput(hex.glm.GLMModel.GLMOutput) GLMModel(hex.glm.GLMModel)

Example 7 with DataInfo

use of hex.DataInfo in project h2o-3 by h2oai.

the class MakeGLMModelHandler method computeGram.

public GramV3 computeGram(int v, GramV3 input) {
    if (DKV.get(input.X.key()) == null)
        throw new IllegalArgumentException("Frame " + input.X.key() + " does not exist.");
    Frame fr = input.X.key().get();
    Frame frcpy = new Frame(fr._names.clone(), fr.vecs().clone());
    String wname = null;
    Vec weight = null;
    if (input.W != null && !input.W.column_name.isEmpty()) {
        wname = input.W.column_name;
        if (fr.find(wname) == -1)
            throw new IllegalArgumentException("Did not find weight vector " + wname);
        weight = frcpy.remove(wname);
    }
    DataInfo dinfo = new DataInfo(frcpy, null, 0, input.use_all_factor_levels, input.standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, input.skip_missing, false, !input.skip_missing, /* weight */
    false, /* offset */
    false, /* fold */
    false, /* intercept */
    true);
    DKV.put(dinfo);
    if (weight != null)
        dinfo.setWeights(wname, weight);
    Gram.GramTask gt = new Gram.GramTask(null, dinfo, false, true).doAll(dinfo._adaptedFrame);
    double[][] gram = gt._gram.getXX();
    dinfo.remove();
    String[] names = water.util.ArrayUtils.append(dinfo.coefNames(), "Intercept");
    Vec[] vecs = new Vec[gram.length];
    Key[] keys = new VectorGroup().addVecs(vecs.length);
    for (int i = 0; i < vecs.length; ++i) vecs[i] = Vec.makeVec(gram[i], keys[i]);
    input.destination_frame = new KeyV3.FrameKeyV3();
    String keyname = input.X.key().toString();
    if (keyname.endsWith(".hex"))
        keyname = keyname.substring(0, keyname.lastIndexOf("."));
    keyname = keyname + "_gram";
    if (weight != null)
        keyname = keyname + "_" + wname;
    Key k = Key.make(keyname);
    if (DKV.get(k) != null) {
        int cnt = 0;
        while (cnt < 1000 && DKV.get(k = Key.make(keyname + "_" + cnt)) != null) cnt++;
        if (cnt == 1000)
            throw new IllegalArgumentException("unable to make unique key");
    }
    input.destination_frame.fillFromImpl(k);
    DKV.put(new Frame(k, names, vecs));
    return input;
}
Also used : DataInfo(hex.DataInfo) ValFrame(water.rapids.vals.ValFrame) KeyV3(water.api.schemas3.KeyV3) Gram(hex.gram.Gram) VectorGroup(water.fvec.Vec.VectorGroup) Key(water.Key)

Example 8 with DataInfo

use of hex.DataInfo in project h2o-3 by h2oai.

the class ComputationState method applyStrongRulesMultinomial.

/**
   * Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
   *
   * @return indices of expected active predictors.
   */
protected void applyStrongRulesMultinomial(double lambdaNew, double lambdaOld) {
    int P = _dinfo.fullN();
    int N = P + 1;
    int selected = 0;
    _activeBC = _bc;
    _activeData = _dinfo;
    if (!_allIn) {
        if (_activeDataMultinomial == null)
            _activeDataMultinomial = new DataInfo[_nclasses];
        final double rhs = _alpha * (2 * lambdaNew - lambdaOld);
        int[] cols = MemoryManager.malloc4(N * _nclasses);
        int oldActiveColsTotal = 0;
        for (int c = 0; c < _nclasses; ++c) {
            int j = 0;
            int[] oldActiveCols = _activeDataMultinomial[c] == null ? new int[] { P } : _activeDataMultinomial[c]._activeCols;
            oldActiveColsTotal += oldActiveCols.length;
            for (int i = 0; i < P; ++i) {
                if (j < oldActiveCols.length && i == oldActiveCols[j]) {
                    ++j;
                } else if (_ginfo._gradient[c * N + i] > rhs || _ginfo._gradient[c * N + i] < -rhs) {
                    cols[selected++] = c * N + i;
                }
            }
        }
        if (_parms._max_active_predictors != -1 && _parms._max_active_predictors - oldActiveColsTotal + _nclasses < selected) {
            Integer[] bigInts = ArrayUtils.toIntegers(cols, 0, selected);
            Arrays.sort(bigInts, new Comparator<Integer>() {

                @Override
                public int compare(Integer o1, Integer o2) {
                    return (int) Math.signum(_ginfo._gradient[o2.intValue()] * _ginfo._gradient[o2.intValue()] - _ginfo._gradient[o1.intValue()] * _ginfo._gradient[o1.intValue()]);
                }
            });
            cols = ArrayUtils.toInt(bigInts, 0, _parms._max_active_predictors - oldActiveColsTotal + _nclasses);
            Arrays.sort(cols);
            selected = cols.length;
        }
        int i = 0;
        int[] cs = new int[P + 1];
        int sum = 0;
        for (int c = 0; c < _nclasses; ++c) {
            int[] classcols = cs;
            int[] oldActiveCols = _activeDataMultinomial[c] == null ? new int[] { P } : _activeDataMultinomial[c]._activeCols;
            int k = 0;
            while (i < selected && cols[i] < (c + 1) * N) classcols[k++] = cols[i++] - c * N;
            classcols = ArrayUtils.sortedMerge(oldActiveCols, Arrays.copyOf(classcols, k));
            sum += classcols.length;
            _activeDataMultinomial[c] = _dinfo.filterExpandedColumns(classcols);
        }
        assert _parms._max_active_predictors == -1 || sum <= _parms._max_active_predictors + _nclasses : "sum = " + sum + " max_active_preds = " + _parms._max_active_predictors + ", nclasses = " + _nclasses;
        _allIn = sum == N * _nclasses;
    }
}
Also used : DataInfo(hex.DataInfo) BetaConstraint(hex.glm.GLM.BetaConstraint)

Example 9 with DataInfo

use of hex.DataInfo in project h2o-3 by h2oai.

the class ComputationState method adjustToNewLambda.

private void adjustToNewLambda(double lambdaNew, double lambdaOld) {
    double ldiff = lambdaNew - lambdaOld;
    if (ldiff == 0 || l2pen() == 0)
        return;
    double l2pen = .5 * ArrayUtils.l2norm2(_beta, true);
    if (l2pen > 0) {
        if (_parms._family == Family.multinomial) {
            int off = 0;
            for (int c = 0; c < _nclasses; ++c) {
                DataInfo activeData = activeDataMultinomial(c);
                for (int i = 0; i < activeData.fullN(); ++i) _ginfo._gradient[off + i] += ldiff * _beta[off + i];
                off += activeData.fullN() + 1;
            }
        } else
            for (int i = 0; i < _activeData.fullN(); ++i) _ginfo._gradient[i] += ldiff * _beta[i];
    }
    _ginfo = new GLMGradientInfo(_ginfo._likelihood, _ginfo._objVal + ldiff * l2pen, _ginfo._gradient);
}
Also used : DataInfo(hex.DataInfo) GLMGradientInfo(hex.glm.GLM.GLMGradientInfo) BetaConstraint(hex.glm.GLM.BetaConstraint)

Example 10 with DataInfo

use of hex.DataInfo in project h2o-3 by h2oai.

the class LinearAlgebraUtils method toEigen.

public static Vec toEigen(Vec src) {
    Frame train = new Frame(Key.<Frame>make(), new String[] { "enum" }, new Vec[] { src });
    DataInfo dinfo = new DataInfo(train, null, 0, true, /*_use_all_factor_levels*/
    DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, /* skipMissing */
    false, /* imputeMissing */
    true, /* missingBucket */
    false, /* weights */
    false, /* offset */
    false, /* fold */
    false, /* intercept */
    false);
    DKV.put(dinfo);
    Gram.GramTask gtsk = new Gram.GramTask(null, dinfo).doAll(dinfo._adaptedFrame);
    // round the numbers to float precision to be more reproducible
    //    double[] rounded = gtsk._gram._diag;
    double[] rounded = new double[gtsk._gram._diag.length];
    for (int i = 0; i < rounded.length; ++i) rounded[i] = (float) gtsk._gram._diag[i];
    dinfo.remove();
    Vec v = new ProjectOntoEigenVector(multiple(rounded, (int) gtsk._nobs, 1)).doAll(1, (byte) 3, train).outputFrame().anyVec();
    return v;
}
Also used : DataInfo(hex.DataInfo) Frame(water.fvec.Frame) ToEigenVec(hex.ToEigenVec) Vec(water.fvec.Vec) Gram(hex.gram.Gram)

Aggregations

DataInfo (hex.DataInfo)13 Frame (water.fvec.Frame)6 Test (org.junit.Test)5 BetaConstraint (hex.glm.GLM.BetaConstraint)3 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)2 GLMGradientSolver (hex.glm.GLM.GLMGradientSolver)2 GLMParameters (hex.glm.GLMModel.GLMParameters)2 GLMWeightsFun (hex.glm.GLMModel.GLMWeightsFun)2 Gram (hex.gram.Gram)2 GradientInfo (hex.optimization.OptimizationUtils.GradientInfo)2 Vec (water.fvec.Vec)2 ValFrame (water.rapids.vals.ValFrame)2 FrameTask (hex.FrameTask)1 ModelMetricsRegression (hex.ModelMetricsRegression)1 ToEigenVec (hex.ToEigenVec)1 DistributionFamily (hex.genmodel.utils.DistributionFamily)1 GLMGradientInfo (hex.glm.GLM.GLMGradientInfo)1 GLMModel (hex.glm.GLMModel)1 GLMOutput (hex.glm.GLMModel.GLMOutput)1 GradientSolver (hex.optimization.OptimizationUtils.GradientSolver)1