Search in sources :

Example 96 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstAsFactor method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame ary = stk.track(asts[1].exec(env)).getFrame();
    Vec[] nvecs = new Vec[ary.numCols()];
    // Type check  - prescreen for correct types
    for (Vec v : ary.vecs()) if (!(v.isCategorical() || v.isString() || v.isNumeric()))
        throw new IllegalArgumentException("asfactor() requires a string, categorical, or numeric column. " + "Received " + ary.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
    Vec vv;
    for (int c = 0; c < nvecs.length; ++c) {
        vv = ary.vec(c);
        try {
            nvecs[c] = vv.toCategoricalVec();
        } catch (Exception e) {
            VecUtils.deleteVecs(nvecs, c);
            throw e;
        }
    }
    return new ValFrame(new Frame(ary._names, nvecs));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec)

Example 97 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstCBind method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // Compute the variable args.  Find the common row count
    Val[] vals = new Val[asts.length];
    Vec vec = null;
    for (int i = 1; i < asts.length; i++) {
        vals[i] = stk.track(asts[i].exec(env));
        if (vals[i].isFrame()) {
            Vec anyvec = vals[i].getFrame().anyVec();
            // Ignore the empty frame
            if (anyvec == null)
                continue;
            if (vec == null)
                vec = anyvec;
            else if (vec.length() != anyvec.length())
                throw new IllegalArgumentException("cbind frames must have all the same rows, found " + vec.length() + " and " + anyvec.length() + " rows.");
        }
    }
    boolean clean = false;
    if (vec == null) {
        vec = Vec.makeZero(1);
        clean = true;
    }
    // Default to length 1
    // Populate the new Frame
    Frame fr = new Frame();
    for (int i = 1; i < asts.length; i++) {
        switch(vals[i].type()) {
            case Val.FRM:
                fr.add(vals[i].getFrame().names(), fr.makeCompatible(vals[i].getFrame()));
                break;
            case Val.FUN:
                throw H2O.unimpl();
            case Val.STR:
                throw H2O.unimpl();
            case Val.NUM:
                // Auto-expand scalars to fill every row
                double d = vals[i].getNum();
                fr.add(Double.toString(d), vec.makeCon(d));
                break;
            default:
                throw H2O.unimpl();
        }
    }
    if (clean)
        vec.remove();
    return new ValFrame(fr);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec)

Example 98 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstColNames method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    if (asts[2] instanceof AstNumList) {
        if (!(asts[3] instanceof AstStrList))
            throw new IllegalArgumentException("Column naming requires a string-list, but found a " + asts[3].getClass());
        AstNumList cols = ((AstNumList) asts[2]);
        AstStrList nams = ((AstStrList) asts[3]);
        int[] d = cols.expand4();
        if (d.length != nams._strs.length)
            throw new IllegalArgumentException("Must have the same number of column choices as names");
        for (int i = 0; i < d.length; i++) fr._names[d[i]] = nams._strs[i];
    } else if ((asts[2] instanceof AstNum)) {
        int col = (int) (asts[2].exec(env).getNum());
        String name = asts[3].exec(env).getStr();
        fr._names[col] = name;
    } else
        throw new IllegalArgumentException("Column naming requires a number-list, but found a " + asts[2].getClass());
    // Update names in DKV
    if (fr._key != null)
        DKV.put(fr);
    return new ValFrame(fr);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstNumList(water.rapids.ast.params.AstNumList)

Example 99 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstColPySlice method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val v = stk.track(asts[1].exec(env));
    AstParameter colList = (AstParameter) asts[2];
    if (v instanceof ValRow) {
        ValRow vv = (ValRow) v;
        return vv.slice(colList.columns(vv.getNames()));
    }
    Frame fr = v.getFrame();
    int[] cols = colList.columns(fr.names());
    Frame fr2 = new Frame();
    if (// Empty inclusion list?
    cols.length == 0)
        return new ValFrame(fr2);
    if (// Negative cols have number of cols added
    cols[0] < 0)
        for (int i = 0; i < cols.length; i++) cols[i] += fr.numCols();
    if (// Singletons must be in-range
    asts[2] instanceof AstNum && (cols[0] < 0 || cols[0] >= fr.numCols()))
        throw new IllegalArgumentException("Column must be an integer from 0 to " + (fr.numCols() - 1));
    for (// For all included columns
    int col : // For all included columns
    cols) if (// Ignoring out-of-range ones
    col >= 0 && col < fr.numCols())
        fr2.add(fr.names()[col], fr.vecs()[col]);
    return new ValFrame(fr2);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValRow(water.rapids.vals.ValRow) AstParameter(water.rapids.ast.AstParameter)

Example 100 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstVariance method array.

// Matrix covariance.  Compute covariance between all columns from each Frame
// against each other.  Return a matrix of covariances which is frx.numCols
// wide and fry.numCols tall.
private Val array(Frame frx, Frame fry, Mode mode, boolean symmetric) {
    Vec[] vecxs = frx.vecs();
    int ncolx = vecxs.length;
    Vec[] vecys = fry.vecs();
    int ncoly = vecys.length;
    if (mode.equals(Mode.Everything) || mode.equals(Mode.AllObs)) {
        if (mode.equals(Mode.AllObs)) {
            for (Vec v : vecxs) if (v.naCnt() != 0)
                throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
            if (!symmetric)
                for (Vec v : vecys) if (v.naCnt() != 0)
                    throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        }
        CoVarTaskEverything[] cvs = new CoVarTaskEverything[ncoly];
        double[] xmeans = new double[ncolx];
        for (int x = 0; x < ncoly; x++) xmeans[x] = vecxs[x].mean();
        if (symmetric) {
            //1-col returns scalar
            if (ncoly == 1)
                return new ValNum(vecys[0].naCnt() == 0 ? vecys[0].sigma() * vecys[0].sigma() : Double.NaN);
            int[] idx = new int[ncoly];
            for (int y = 1; y < ncoly; y++) idx[y] = y;
            int[] first_index = new int[] { 0 };
            //compute covariances between column_i and column_i+1, column_i+2, ...
            Frame reduced_fr;
            for (int y = 0; y < ncoly - 1; y++) {
                idx = ArrayUtils.removeIds(idx, first_index);
                reduced_fr = new Frame(frx.vecs(idx));
                cvs[y] = new CoVarTaskEverything(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(reduced_fr));
            }
            double[][] res_array = new double[ncoly][ncoly];
            //fill in the diagonals (variances) using sigma from rollupstats
            for (int y = 0; y < ncoly; y++) res_array[y][y] = vecys[y].naCnt() == 0 ? vecys[y].sigma() * vecys[y].sigma() : Double.NaN;
            //arrange the results into the bottom left of res_array. each successive cvs is 1 smaller in length
            for (int y = 0; y < ncoly - 1; y++) System.arraycopy(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), 0, res_array[y], y + 1, ncoly - y - 1);
            //copy over the bottom left of res_array to its top right
            for (int y = 0; y < ncoly - 1; y++) {
                for (int x = y + 1; x < ncoly; x++) {
                    res_array[x][y] = res_array[y][x];
                }
            }
            //set Frame
            Vec[] res = new Vec[ncoly];
            Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
            for (int y = 0; y < ncoly; y++) {
                res[y] = Vec.makeVec(res_array[y], keys[y]);
            }
            return new ValFrame(new Frame(fry._names, res));
        }
        // Launch tasks; each does all Xs vs one Y
        for (int y = 0; y < ncoly; y++) cvs[y] = new CoVarTaskEverything(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(frx));
        // 1-col returns scalar 
        if (ncolx == 1 && ncoly == 1) {
            return new ValNum(cvs[0].getResult()._covs[0] / (fry.numRows() - 1));
        }
        // Gather all the Xs-vs-Y covariance arrays; divide by rows
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) res[y] = Vec.makeVec(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), keys[y]);
        return new ValFrame(new Frame(fry._names, res));
    } else {
        if (symmetric) {
            if (ncoly == 1)
                return new ValNum(vecys[0].sigma() * vecys[0].sigma());
            CoVarTaskCompleteObsMeanSym taskCompleteObsMeanSym = new CoVarTaskCompleteObsMeanSym().doAll(fry);
            long NACount = taskCompleteObsMeanSym._NACount;
            double[] ymeans = ArrayUtils.div(taskCompleteObsMeanSym._ysum, fry.numRows() - NACount);
            // 1 task with all Ys
            CoVarTaskCompleteObsSym cvs = new CoVarTaskCompleteObsSym(ymeans).doAll(new Frame(fry));
            double[][] res_array = new double[ncoly][ncoly];
            for (int y = 0; y < ncoly; y++) {
                System.arraycopy(ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)), y, res_array[y], y, ncoly - y);
            }
            //copy over the bottom left of res_array to its top right
            for (int y = 0; y < ncoly - 1; y++) {
                for (int x = y + 1; x < ncoly; x++) {
                    res_array[x][y] = res_array[y][x];
                }
            }
            //set Frame
            Vec[] res = new Vec[ncoly];
            Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
            for (int y = 0; y < ncoly; y++) {
                res[y] = Vec.makeVec(res_array[y], keys[y]);
            }
            return new ValFrame(new Frame(fry._names, res));
        }
        CoVarTaskCompleteObsMean taskCompleteObsMean = new CoVarTaskCompleteObsMean(ncoly, ncolx).doAll(new Frame(fry).add(frx));
        long NACount = taskCompleteObsMean._NACount;
        double[] ymeans = ArrayUtils.div(taskCompleteObsMean._ysum, fry.numRows() - NACount);
        double[] xmeans = ArrayUtils.div(taskCompleteObsMean._xsum, fry.numRows() - NACount);
        // 1 task with all Xs and Ys
        CoVarTaskCompleteObs cvs = new CoVarTaskCompleteObs(ymeans, xmeans).doAll(new Frame(fry).add(frx));
        // 1-col returns scalar 
        if (ncolx == 1 && ncoly == 1) {
            return new ValNum(cvs._covs[0][0] / (fry.numRows() - 1 - NACount));
        }
        // Gather all the Xs-vs-Y covariance arrays; divide by rows
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) res[y] = Vec.makeVec(ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)), keys[y]);
        return new ValFrame(new Frame(fry._names, res));
    }
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) Key(water.Key)

Aggregations

ValFrame (water.rapids.vals.ValFrame)132 Frame (water.fvec.Frame)98 Val (water.rapids.Val)48 Vec (water.fvec.Vec)43 Test (org.junit.Test)38 MRTask (water.MRTask)32 Chunk (water.fvec.Chunk)24 NewChunk (water.fvec.NewChunk)23 BufferedString (water.parser.BufferedString)16 AstNumList (water.rapids.ast.params.AstNumList)11 AstNum (water.rapids.ast.params.AstNum)7 ValNum (water.rapids.vals.ValNum)7 AstRoot (water.rapids.ast.AstRoot)6 ValRow (water.rapids.vals.ValRow)6 ArrayList (java.util.ArrayList)5 Key (water.Key)5 AstStrList (water.rapids.ast.params.AstStrList)5 Futures (water.Futures)4 AstParameter (water.rapids.ast.AstParameter)4 Random (java.util.Random)3