Search in sources :

Example 11 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GLRM method initLoss.

/** Validate all Loss-related parameters, and fill in the `_lossFunc` array. */
private void initLoss() {
    int num_loss_by_cols = _parms._loss_by_col == null ? 0 : _parms._loss_by_col.length;
    int num_loss_by_cols_idx = _parms._loss_by_col_idx == null ? 0 : _parms._loss_by_col_idx.length;
    // First validate the parameters that do not require access to the training frame
    if (_parms._period <= 0)
        error("_period", "_period must be a positive integer");
    if (!_parms._loss.isForNumeric())
        error("_loss", _parms._loss + " is not a numeric loss function");
    if (!_parms._multi_loss.isForCategorical())
        error("_multi_loss", _parms._multi_loss + " is not a multivariate loss function");
    if (num_loss_by_cols != num_loss_by_cols_idx && num_loss_by_cols_idx > 0)
        error("_loss_by_col", "Sizes of arrays _loss_by_col and _loss_by_col_idx must be the same");
    if (_train == null)
        return;
    _binaryColumnIndices = new ArrayList<Integer>();
    // Initialize the default loss functions for each column
    // Note: right now for binary columns `.isCategorical()` returns true. It has the undesired consequence that
    // such variables will get categorical loss function, and will get expanded into 2 columns.
    _lossFunc = new GlrmLoss[_ncolA];
    for (int i = 0; i < _ncolA; i++) {
        Vec vi = _train.vec(i);
        _lossFunc[i] = vi.isCategorical() ? _parms._multi_loss : _parms._loss;
    }
    // grab original frame column names before change
    String[] origColumnNames = _parms.train().names();
    ArrayList<String> newColumnNames = new ArrayList<String>(Arrays.asList(_train._names));
    // If _loss_by_col is provided, then override loss functions on the specified columns
    if (num_loss_by_cols > 0) {
        if (num_loss_by_cols_idx == 0) {
            if (num_loss_by_cols == origColumnNames.length)
                assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
            else
                error("_loss_by_col", "Number of override loss functions should be the same as the " + "number of columns in the input frame; or otherwise an explicit _loss_by_col_idx should be " + "provided.");
        }
        if (num_loss_by_cols_idx == num_loss_by_cols)
            assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
    // Otherwise we have already reported an error at the start of this method
    }
    // Check that all loss functions correspond to their actual type
    for (int i = 0; i < _ncolA; i++) {
        Vec vi = _train.vec(i);
        GlrmLoss lossi = _lossFunc[i];
        if (vi.isNumeric()) {
            // numeric columns
            if (!vi.isBinary()) {
                // non-binary numeric columns
                if (!lossi.isForNumeric())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to numeric column " + i);
            } else {
                // binary numeric columns
                if (!lossi.isForBinary() && !lossi.isForNumeric()) {
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
                }
            }
        } else if (vi.isCategorical()) {
            // categorical columns
            if (vi.isBinary()) {
                // categorical binary columns
                if (!lossi.isForBinary() && !lossi.isForCategorical())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
                else if (lossi.isForBinary())
                    // collect column indices storing binary columns with binary loss function.
                    _binaryColumnIndices.add(i);
            } else {
                // categorical non-binary columns
                if (!lossi.isForCategorical())
                    error("_loss_by_col", "Loss function " + lossi + " cannot be applied to categorical column " + i);
            }
        }
        // different columns.
        if (lossi == GlrmLoss.Periodic)
            lossi.setParameters(_parms._period);
    }
}
Also used : GlrmLoss(hex.genmodel.algos.glrm.GlrmLoss) Vec(water.fvec.Vec) ArrayList(java.util.ArrayList)

Example 12 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class Storage method toFrame.

/**
   * Helper to convert a Matrix into a Frame
   * @param m Matrix
   * @param key Key for output Frame
   * @return Reference to Frame (which is also in DKV)
   */
static Frame toFrame(Matrix m, Key key) {
    final int log_rows_per_chunk = Math.max(1, FileVec.DFLT_LOG2_CHUNK_SIZE - (int) Math.floor(Math.log(m.cols()) / Math.log(2.)));
    Vec[] v = new Vec[m.cols()];
    for (int i = 0; i < m.cols(); ++i) {
        v[i] = makeCon(0, m.rows(), log_rows_per_chunk);
    }
    Frame f = new FrameFiller(m).doAll(new Frame(key, v, true))._fr;
    DKV.put(key, f);
    return f;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) FileVec(water.fvec.FileVec)

Example 13 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AggregatorModel method scoreExemplarMembers.

@Override
public Frame scoreExemplarMembers(Key<Frame> destination_key, final int exemplarIdx) {
    Vec booleanCol = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            for (int i = 0; i < c._len; ++i) nc.addNum(c.at8(i) == _exemplars[exemplarIdx].gid ? 1 : 0, 0);
        }
    }.doAll(Vec.T_NUM, new Frame(new Vec[] { _exemplar_assignment_vec_key.get() })).outputFrame().anyVec();
    Frame orig = _parms.train();
    Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
    vecs[vecs.length - 1] = booleanCol;
    Frame ff = new Frame(orig.names(), orig.vecs());
    ff.add("predicate", booleanCol);
    Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
    FrameUtils.shrinkDomainsToObservedSubset(res);
    DKV.put(res);
    assert (res.numRows() == _counts[exemplarIdx]);
    booleanCol.remove();
    return res;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 14 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AggregatorModel method createFrameOfExemplars.

public Frame createFrameOfExemplars(Frame orig, Key destination_key) {
    final long[] keep = new long[_exemplars.length];
    for (int i = 0; i < keep.length; ++i) keep[i] = _exemplars[i].gid;
    Vec exAssignment = _exemplar_assignment_vec_key.get();
    // preserve the original row order
    Vec booleanCol = new MRTask() {

        @Override
        public void map(Chunk c, Chunk c2) {
            for (int i = 0; i < keep.length; ++i) {
                if (keep[i] < c.start())
                    continue;
                if (keep[i] >= c.start() + c._len)
                    continue;
                c2.set((int) (keep[i] - c.start()), 1);
            }
        }
    }.doAll(new Frame(new Vec[] { exAssignment, exAssignment.makeZero() }))._fr.vec(1);
    Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length + 1);
    vecs[vecs.length - 1] = booleanCol;
    Frame ff = new Frame(orig.names(), orig.vecs());
    ff.add("predicate", booleanCol);
    Frame res = new Frame.DeepSelect().doAll(orig.types(), ff).outputFrame(destination_key, orig.names(), orig.domains());
    FrameUtils.shrinkDomainsToObservedSubset(res);
    booleanCol.remove();
    assert (res.numRows() == _exemplars.length);
    Vec cnts = res.anyVec().makeZero();
    Vec.Writer vw = cnts.open();
    for (int i = 0; i < _counts.length; ++i) vw.set(i, _counts[i]);
    vw.close();
    res.add("counts", cnts);
    DKV.put(destination_key, res);
    return res;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk)

Example 15 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class DHistogram method initialHist.

// The initial histogram bins are setup from the Vec rollups.
public static DHistogram[] initialHist(Frame fr, int ncols, int nbins, DHistogram[] hs, long seed, SharedTreeModel.SharedTreeParameters parms, Key[] globalQuantilesKey) {
    Vec[] vecs = fr.vecs();
    for (int c = 0; c < ncols; c++) {
        Vec v = vecs[c];
        // inclusive vector min
        final double minIn = v.isCategorical() ? 0 : Math.max(v.min(), -Double.MAX_VALUE);
        // inclusive vector max
        final double maxIn = v.isCategorical() ? v.domain().length - 1 : Math.min(v.max(), Double.MAX_VALUE);
        // smallest exclusive max
        final double maxEx = v.isCategorical() ? v.domain().length : find_maxEx(maxIn, v.isInt() ? 1 : 0);
        final long vlen = v.length();
        try {
            hs[c] = v.naCnt() == vlen || v.min() == v.max() ? null : make(fr._names[c], nbins, (byte) (v.isCategorical() ? 2 : (v.isInt() ? 1 : 0)), minIn, maxEx, seed, parms, globalQuantilesKey[c]);
        } catch (StepOutOfRangeException e) {
            hs[c] = null;
            Log.warn("Column " + fr._names[c] + " with min = " + v.min() + ", max = " + v.max() + " has step out of range (" + e.getMessage() + ") and is ignored.");
        }
        assert (hs[c] == null || vlen > 0);
    }
    return hs;
}
Also used : Vec(water.fvec.Vec)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9