Search in sources :

Example 41 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstIsNa method exec.

@Override
public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = "isNA(" + fr.name(i) + ")";
            }
            return new ValFrame(new MRTask() {

                @Override
                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(c.isNA(i) ? 1 : 0);
                    }
                }
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.STR:
            return new ValNum(val.getStr() == null ? 1 : 0);
        default:
            throw H2O.unimpl("is.na unimpl: " + val.getClass());
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 42 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstNaOmit method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame fr2 = new MRTask() {

        private void copyRow(int row, Chunk[] cs, NewChunk[] ncs) {
            for (int i = 0; i < cs.length; ++i) {
                if (cs[i] instanceof CStrChunk)
                    ncs[i].addStr(cs[i], row);
                else if (cs[i] instanceof C16Chunk)
                    ncs[i].addUUID(cs[i], row);
                else if (cs[i].hasFloat())
                    ncs[i].addNum(cs[i].atd(row));
                else
                    ncs[i].addNum(cs[i].at8(row), 0);
            }
        }

        @Override
        public void map(Chunk[] cs, NewChunk[] ncs) {
            int col;
            for (int row = 0; row < cs[0]._len; ++row) {
                for (col = 0; col < cs.length; ++col) if (cs[col].isNA(row))
                    break;
                if (col == cs.length)
                    copyRow(row, cs, ncs);
            }
        }
    }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 43 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final double d, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 44 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstIfElse method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val val = stk.track(asts[1].exec(env));
    if (val.isNum()) {
        // Scalar test, scalar result
        double d = val.getNum();
        if (Double.isNaN(d))
            return new ValNum(Double.NaN);
        // exec only 1 of false and true
        Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
        return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
    }
    // Frame test.  Frame result.
    if (val.type() == Val.ROW)
        return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
    Frame tst = val.getFrame();
    // If all zero's, return false and never execute true.
    Frame fr = new Frame(tst);
    Val tval = null;
    for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
        tval = exec_check(env, stk, tst, asts[2], fr);
        break;
    }
    final boolean has_tfr = tval != null && tval.isFrame();
    final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
    final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
    final int[] tsIntMap = new int[tst.numCols()];
    // If all nonzero's (or NA's), then never execute false.
    Val fval = null;
    for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
        fval = exec_check(env, stk, tst, asts[3], fr);
        break;
    }
    final boolean has_ffr = fval != null && fval.isFrame();
    final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
    final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
    final int[] fsIntMap = new int[tst.numCols()];
    String[][] domains = null;
    final int[][] maps = new int[tst.numCols()][];
    if (fs != null || ts != null) {
        // time to build domains...
        domains = new String[tst.numCols()][];
        if (fs != null && ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                // false => 0; truth => 1
                domains[i] = new String[] { fs, ts };
                fsIntMap[i] = 0;
                tsIntMap[i] = 1;
            }
        } else if (ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_ffr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = ts;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    tsIntMap[i] = ArrayUtils.find(dom, ts);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        } else {
            // fs!=null
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_tfr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = fs;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    fsIntMap[i] = ArrayUtils.find(dom, fs);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        }
    }
    // Now pick from left-or-right in the new frame
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] nchks) {
            assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
            for (int i = 0; i < nchks.length; i++) {
                Chunk ctst = chks[i];
                NewChunk res = nchks[i];
                for (int row = 0; row < ctst._len; row++) {
                    double d;
                    if (ctst.isNA(row))
                        d = Double.NaN;
                    else if (ctst.atd(row) == 0)
                        d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
                    else
                        d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
                    res.addNum(d);
                }
            }
        }
    }.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
    // flatten domains since they may be larger than needed
    if (domains != null) {
        for (int i = 0; i < res.numCols(); ++i) {
            if (res.vec(i).domain() != null) {
                final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
                String[] newDomain = new String[dom.length];
                for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
                new MRTask() {

                    @Override
                    public void map(Chunk c) {
                        for (int i = 0; i < c._len; ++i) {
                            if (!c.isNA(i))
                                c.set(i, ArrayUtils.find(dom, c.at8(i)));
                        }
                    }
                }.doAll(res.vec(i));
                // needs a DKVput?
                res.vec(i).setDomain(newDomain);
            }
        }
    }
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 45 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstMad method mad.

public static double mad(Frame f, QuantileModel.CombineMethod cm, double constant) {
    // need Frames everywhere because of QuantileModel demanding a Frame...
    Key tk = null;
    if (f._key == null) {
        DKV.put(tk = Key.make(), f = new Frame(tk, f.names(), f.vecs()));
    }
    final double median = AstMedian.median(f, cm);
    Frame abs_dev = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            for (int i = 0; i < c._len; ++i) nc.addNum(Math.abs(c.at8(i) - median));
        }
    }.doAll(1, Vec.T_NUM, f).outputFrame();
    if (abs_dev._key == null) {
        DKV.put(tk = Key.make(), abs_dev = new Frame(tk, abs_dev.names(), abs_dev.vecs()));
    }
    double mad = AstMedian.median(abs_dev, cm);
    // drp mapping, keep vec
    DKV.remove(f._key);
    DKV.remove(abs_dev._key);
    return constant * mad;
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) Key(water.Key) NewChunk(water.fvec.NewChunk)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1