Search in sources :

Example 1 with GlrmLoss

use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.

the class GLRM method initLoss.

/** Validate all Loss-related parameters, and fill in the `_lossFunc` array. */
private void initLoss() {
    int num_loss_by_cols = _parms._loss_by_col == null ? 0 : _parms._loss_by_col.length;
    int num_loss_by_cols_idx = _parms._loss_by_col_idx == null ? 0 : _parms._loss_by_col_idx.length;
    // First validate the parameters that do not require access to the training frame
    if (_parms._period <= 0)
        error("_period", "_period must be a positive integer");
    if (!_parms._loss.isForNumeric())
        error("_loss", _parms._loss + " is not a numeric loss function");
    if (!_parms._multi_loss.isForCategorical())
        error("_multi_loss", _parms._multi_loss + " is not a multivariate loss function");
    if (num_loss_by_cols != num_loss_by_cols_idx && num_loss_by_cols_idx > 0)
        error("_loss_by_col", "Sizes of arrays _loss_by_col and _loss_by_col_idx must be the same");
    if (_train == null)
        return;
    _binaryColumnIndices = new ArrayList<Integer>();
    // Initialize the default loss functions for each column
    // Note: right now for binary columns `.isCategorical()` returns true. It has the undesired consequence that
    // such variables will get categorical loss function, and will get expanded into 2 columns.
    _lossFunc = new GlrmLoss[_ncolA];
    for (int i = 0; i < _ncolA; i++) {
        Vec vi = _train.vec(i);
        _lossFunc[i] = vi.isCategorical() ? _parms._multi_loss : _parms._loss;
    }
    // grab original frame column names before change
    String[] origColumnNames = _parms.train().names();
    ArrayList<String> newColumnNames = new ArrayList<String>(Arrays.asList(_train._names));
    // If _loss_by_col is provided, then override loss functions on the specified columns
    if (num_loss_by_cols > 0) {
        if (num_loss_by_cols_idx == 0) {
            if (num_loss_by_cols == origColumnNames.length)
                assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
            else
                error("_loss_by_col", "Number of override loss functions should be the same as the " + "number of columns in the input frame; or otherwise an explicit _loss_by_col_idx should be " + "provided.");
        }
        if (num_loss_by_cols_idx == num_loss_by_cols)
            assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
    // Otherwise we have already reported an error at the start of this method
    }
    // Check that all loss functions correspond to their actual type
    for (int i = 0; i < _ncolA; i++) {
        Vec vi = _train.vec(i);
        GlrmLoss lossi = _lossFunc[i];
        if (vi.isNumeric()) {
            // numeric columns
            if (!vi.isBinary()) {
                // non-binary numeric columns
                if (!lossi.isForNumeric())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to numeric column " + i);
            } else {
                // binary numeric columns
                if (!lossi.isForBinary() && !lossi.isForNumeric()) {
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
                }
            }
        } else if (vi.isCategorical()) {
            // categorical columns
            if (vi.isBinary()) {
                // categorical binary columns
                if (!lossi.isForBinary() && !lossi.isForCategorical())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
                else if (lossi.isForBinary())
                    // collect column indices storing binary columns with binary loss function.
                    _binaryColumnIndices.add(i);
            } else {
                // categorical non-binary columns
                if (!lossi.isForCategorical())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to categorical column " + i);
            }
        }
        // different columns.
        if (lossi == GlrmLoss.Periodic)
            lossi.setParameters(_parms._period);
    }
}
Also used : GlrmLoss(hex.genmodel.algos.glrm.GlrmLoss) Vec(water.fvec.Vec) ArrayList(java.util.ArrayList)

Example 2 with GlrmLoss

use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.

the class GlrmMojoWriter method writeModelData.

@Override
protected void writeModelData() throws IOException {
    writekv("initialization", model._parms._init);
    writekv("regularizationX", model._parms._regularization_x);
    writekv("regularizationY", model._parms._regularization_y);
    writekv("gammaX", model._parms._gamma_x);
    writekv("gammaY", model._parms._gamma_y);
    writekv("ncolX", model._parms._k);
    // DataInfo mapping
    writekv("cols_permutation", model._output._permutation);
    writekv("num_categories", model._output._ncats);
    writekv("num_numeric", model._output._nnums);
    writekv("norm_sub", model._output._normSub);
    writekv("norm_mul", model._output._normMul);
    // Loss functions
    writekv("ncolA", model._output._lossFunc.length);
    startWritingTextFile("losses");
    for (GlrmLoss loss : model._output._lossFunc) {
        writeln(loss.toString());
    }
    finishWritingTextFile();
    // Archetypes
    GLRM.Archetypes arch = model._output._archetypes_raw;
    writekv("ncolY", arch.nfeatures());
    writekv("nrowY", arch.rank());
    writekv("num_levels_per_category", arch._numLevels);
    int n = arch.rank() * arch.nfeatures();
    ByteBuffer bb = ByteBuffer.wrap(new byte[n * 8]);
    for (double[] row : arch.getY(false)) for (double val : row) bb.putDouble(val);
    writeblob("archetypes", bb.array());
}
Also used : GlrmLoss(hex.genmodel.algos.glrm.GlrmLoss) ByteBuffer(java.nio.ByteBuffer)

Example 3 with GlrmLoss

use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.

the class GLRMCategoricalTest method testLosses.

@Test
public void testLosses() throws InterruptedException, ExecutionException {
    long seed = 0xDECAF;
    Random rng = new Random(seed);
    Frame train = null;
    // Categoricals: CAPSULE, RACE, DPROS, DCAPS
    final int[] cats = new int[] { 1, 3, 4, 5 };
    final GlrmRegularizer[] regs = new GlrmRegularizer[] { GlrmRegularizer.Quadratic, GlrmRegularizer.L1, GlrmRegularizer.NonNegative, GlrmRegularizer.OneSparse, GlrmRegularizer.UnitOneSparse, GlrmRegularizer.Simplex };
    Scope.enter();
    try {
        train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
        for (int i = 0; i < cats.length; i++) Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec()));
        train.remove("ID").remove();
        DKV.put(train._key, train);
        for (GlrmLoss loss : new GlrmLoss[] { GlrmLoss.Quadratic, GlrmLoss.Absolute, GlrmLoss.Huber, GlrmLoss.Poisson }) {
            for (GlrmLoss multiloss : new GlrmLoss[] { GlrmLoss.Categorical, GlrmLoss.Ordinal }) {
                GLRMModel model = null;
                try {
                    Scope.enter();
                    long myseed = rng.nextLong();
                    Log.info("GLRM using seed = " + myseed);
                    GLRMParameters parms = new GLRMParameters();
                    parms._train = train._key;
                    parms._transform = DataInfo.TransformType.NONE;
                    parms._k = 5;
                    parms._loss = loss;
                    parms._multi_loss = multiloss;
                    parms._init = GlrmInitialization.SVD;
                    parms._regularization_x = regs[rng.nextInt(regs.length)];
                    parms._regularization_y = regs[rng.nextInt(regs.length)];
                    parms._gamma_x = Math.abs(rng.nextDouble());
                    parms._gamma_y = Math.abs(rng.nextDouble());
                    parms._recover_svd = false;
                    parms._seed = myseed;
                    parms._verbose = false;
                    parms._max_iterations = 500;
                    model = new GLRM(parms).trainModel().get();
                    Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
                    model.score(train).delete();
                    ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
                    Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
                } finally {
                    if (model != null)
                        model.delete();
                    Scope.exit();
                }
            }
        }
    } finally {
        if (train != null)
            train.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) GlrmLoss(hex.genmodel.algos.glrm.GlrmLoss) Random(java.util.Random) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) GlrmRegularizer(hex.genmodel.algos.glrm.GlrmRegularizer) Test(org.junit.Test)

Example 4 with GlrmLoss

use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.

the class GLRMTest method checkLossbyCol.

public static void checkLossbyCol(GLRMParameters parms, GLRMModel model) {
    int ncats = model._output._ncats;
    GlrmLoss[] actual = model._output._lossFunc;
    assert ncats >= 0 && ncats <= actual.length;
    if (null == parms._loss_by_col || null == parms._loss_by_col_idx)
        return;
    Assert.assertEquals(parms._loss_by_col.length, parms._loss_by_col_idx.length);
    // Map original to adapted frame column indices
    int[] loss_idx_adapt = new int[parms._loss_by_col_idx.length];
    for (int i = 0; i < parms._loss_by_col_idx.length; i++) {
        int idx_adapt = -1;
        for (int j = 0; j < model._output._permutation.length; j++) {
            if (model._output._permutation[j] == parms._loss_by_col_idx[i]) {
                idx_adapt = j;
                break;
            }
        }
        loss_idx_adapt[i] = idx_adapt;
    }
    Arrays.sort(loss_idx_adapt);
    // Categorical columns
    for (int i = 0; i < ncats; i++) {
        int idx = Arrays.binarySearch(loss_idx_adapt, i);
        GlrmLoss comp = idx >= 0 ? parms._loss_by_col[idx] : parms._multi_loss;
        Assert.assertEquals(comp, actual[i]);
    }
    // Numeric columns
    for (int i = ncats; i < actual.length; i++) {
        int idx = Arrays.binarySearch(loss_idx_adapt, i);
        GlrmLoss comp = idx >= 0 ? parms._loss_by_col[idx] : parms._loss;
        Assert.assertEquals(comp, actual[i]);
    }
}
Also used : GlrmLoss(hex.genmodel.algos.glrm.GlrmLoss)

Aggregations

GlrmLoss (hex.genmodel.algos.glrm.GlrmLoss)4 GlrmRegularizer (hex.genmodel.algos.glrm.GlrmRegularizer)1 GLRMParameters (hex.glrm.GLRMModel.GLRMParameters)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 Test (org.junit.Test)1 Frame (water.fvec.Frame)1 Vec (water.fvec.Vec)1