Search in sources :

Example 6 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstRowSlice method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame returningFrame;
    long nrows = fr.numRows();
    if (asts[2] instanceof AstNumList) {
        final AstNumList nums = (AstNumList) asts[2];
        if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
            throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
        long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
        if (rows != null) {
            if (rows.length == 0) {
            // Empty inclusion list?
            } else if (rows[0] >= 0) {
                // Positive (inclusion) list
                if (rows[rows.length - 1] > nrows)
                    throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
            } else {
                // Negative (exclusion) list
                if (rows[rows.length - 1] >= 0)
                    throw new IllegalArgumentException("Cannot mix negative and postive row selection");
                // Invert the list to make a positive list, ignoring out-of-bounds values
                BitSet bs = new BitSet((int) nrows);
                for (long row : rows) {
                    // The positive index
                    int idx = (int) (-row - 1);
                    if (idx >= 0 && idx < nrows)
                        // Set column to EXCLUDE
                        bs.set(idx);
                }
                rows = new long[(int) nrows - bs.cardinality()];
                for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
            }
        }
        final long[] ls = rows;
        returningFrame = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                if (nums.cnt() == 0)
                    return;
                if (ls != null && ls.length == 0)
                    return;
                long start = cs[0].start();
                long end = start + cs[0]._len;
                // exclusive max to inclusive max when stride == 1
                long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
                //5                   [ nums ]  nums run rite:  start <= nums.min() && end < nums.max()
                if (!(max < start || min > end)) {
                    // not situation 1 or 2 above
                    // situation 4 and 5 => min > start;
                    long startOffset = (min > start ? min : start);
                    for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
                        if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
                            for (int c = 0; c < cs.length; ++c) {
                                if (cs[c] instanceof CStrChunk)
                                    ncs[c].addStr(cs[c], i);
                                else if (cs[c] instanceof C16Chunk)
                                    ncs[c].addUUID(cs[c], i);
                                else if (cs[c].isNA(i))
                                    ncs[c].addNA();
                                else
                                    ncs[c].addNum(cs[c].atd(i));
                            }
                        }
                    }
                }
            }
        }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
    } else if ((asts[2] instanceof AstNum)) {
        long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
        returningFrame = fr.deepSlice(rows, null);
    } else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
        Frame predVec = stk.track(asts[2].exec(env)).getFrame();
        if (predVec.numCols() != 1)
            throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns.  Must be a boolean Vec.");
        returningFrame = fr.deepSlice(predVec, null);
    } else
        throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
    return new ValFrame(returningFrame);
}
Also used : ValFrame(water.rapids.vals.ValFrame) AstExec(water.rapids.ast.AstExec) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) AstId(water.rapids.ast.params.AstId) AstNumList(water.rapids.ast.params.AstNumList)

Example 7 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstScale method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    // Peel out the bias/shift/mean
    double[] means;
    if (asts[2] instanceof AstNumList) {
        means = ((AstNumList) asts[2]).expand();
        if (means.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        double d = asts[2].exec(env).getNum();
        if (// No change on means, so zero-filled
        d == 0)
            // No change on means, so zero-filled
            means = new double[ncols];
        else if (d == 1)
            means = fr.means();
        else
            throw new IllegalArgumentException("Only true or false allowed");
    }
    // Peel out the scale/stddev
    double[] mults;
    if (asts[3] instanceof AstNumList) {
        mults = ((AstNumList) asts[3]).expand();
        if (mults.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        Val v = asts[3].exec(env);
        if (v instanceof ValFrame) {
            mults = toArray(v.getFrame().anyVec());
        } else {
            double d = v.getNum();
            if (d == 0)
                // No change on mults, so one-filled
                Arrays.fill(mults = new double[ncols], 1.0);
            else if (d == 1)
                mults = fr.mults();
            else
                throw new IllegalArgumentException("Only true or false allowed");
        }
    }
    // Update in-place.
    // Make final copy for closure
    final double[] fmeans = means;
    // Make final copy for closure
    final double[] fmults = mults;
    new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
        }
    }.doAll(fr);
    return new ValFrame(fr);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) AstNumList(water.rapids.ast.params.AstNumList)

Example 8 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstSetDomain method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    String[] _domains = ((AstStrList) asts[2])._strs;
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Must be a single column. Got: " + f.numCols() + " columns.");
    Vec v = f.anyVec();
    if (!v.isCategorical())
        throw new IllegalArgumentException("Vector must be a factor column. Got: " + v.get_type_str());
    if (_domains != null && _domains.length != v.domain().length) {
        // in this case we want to recollect the domain and check that number of levels matches _domains
        VecUtils.CollectDomainFast t = new VecUtils.CollectDomainFast((int) v.max());
        t.doAll(v);
        final long[] dom = t.domain();
        if (dom.length != _domains.length)
            throw new IllegalArgumentException("Number of replacement factors must equal current number of levels. Current number of levels: " + dom.length + " != " + _domains.length);
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) {
                    if (!c.isNA(i)) {
                        long num = Arrays.binarySearch(dom, c.at8(i));
                        if (num < 0)
                            throw new IllegalArgumentException("Could not find the categorical value!");
                        c.set(i, num);
                    }
                }
            }
        }.doAll(v);
    }
    v.setDomain(_domains);
    DKV.put(v);
    return new ValFrame(f);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) ValFrame(water.rapids.vals.ValFrame) VecUtils(water.util.VecUtils) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 9 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstBinOp method frame_op_frame.

/**
   * Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
   * the rest.  Otherwise the frames must have the same column count, and
   * auto-widen element-by-element.  Short-cut if one frame has zero
   * columns.
   */
private ValFrame frame_op_frame(Frame lf, Frame rt) {
    if (lf.numRows() != rt.numRows()) {
        // special case for broadcasting a single row of data across a frame
        if (lf.numRows() == 1 || rt.numRows() == 1) {
            if (lf.numCols() != rt.numCols())
                throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
            return frame_op_row(lf, rt);
        } else
            throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
    }
    if (lf.numCols() == 0)
        return new ValFrame(lf);
    if (rt.numCols() == 0)
        return new ValFrame(rt);
    if (lf.numCols() == 1 && rt.numCols() > 1)
        return vec_op_frame(lf.vecs()[0], rt);
    if (rt.numCols() == 1 && lf.numCols() > 1)
        return frame_op_vec(lf, rt.vecs()[0]);
    if (lf.numCols() != rt.numCols())
        throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString lfstr = new BufferedString();
            BufferedString rtstr = new BufferedString();
            assert (cress.length << 1) == chks.length;
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                Chunk crt = chks[c + cress.length];
                NewChunk cres = cress[c];
                if (clf.vec().isString())
                    for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
                else
                    for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 10 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstBinOp method frame_op_vec.

private ValFrame frame_op_vec(Frame fr, Vec vec) {
    // Already checked for same rows, non-zero frame
    Frame lf = new Frame(fr);
    lf.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk crt = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

ValFrame (water.rapids.vals.ValFrame)132 Frame (water.fvec.Frame)98 Val (water.rapids.Val)48 Vec (water.fvec.Vec)43 Test (org.junit.Test)38 MRTask (water.MRTask)32 Chunk (water.fvec.Chunk)24 NewChunk (water.fvec.NewChunk)23 BufferedString (water.parser.BufferedString)16 AstNumList (water.rapids.ast.params.AstNumList)11 AstNum (water.rapids.ast.params.AstNum)7 ValNum (water.rapids.vals.ValNum)7 AstRoot (water.rapids.ast.AstRoot)6 ValRow (water.rapids.vals.ValRow)6 ArrayList (java.util.ArrayList)5 Key (water.Key)5 AstStrList (water.rapids.ast.params.AstStrList)5 Futures (water.Futures)4 AstParameter (water.rapids.ast.AstParameter)4 Random (java.util.Random)3