Search in sources :

Example 66 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstIsNa method exec.

@Override
public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = "isNA(" + fr.name(i) + ")";
            }
            return new ValFrame(new MRTask() {

                @Override
                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(c.isNA(i) ? 1 : 0);
                    }
                }
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.STR:
            return new ValNum(val.getStr() == null ? 1 : 0);
        default:
            throw H2O.unimpl("is.na unimpl: " + val.getClass());
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 67 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final double d, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 68 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstIfElse method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val val = stk.track(asts[1].exec(env));
    if (val.isNum()) {
        // Scalar test, scalar result
        double d = val.getNum();
        if (Double.isNaN(d))
            return new ValNum(Double.NaN);
        // exec only 1 of false and true
        Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
        return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
    }
    // Frame test.  Frame result.
    if (val.type() == Val.ROW)
        return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
    Frame tst = val.getFrame();
    // If all zero's, return false and never execute true.
    Frame fr = new Frame(tst);
    Val tval = null;
    for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
        tval = exec_check(env, stk, tst, asts[2], fr);
        break;
    }
    final boolean has_tfr = tval != null && tval.isFrame();
    final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
    final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
    final int[] tsIntMap = new int[tst.numCols()];
    // If all nonzero's (or NA's), then never execute false.
    Val fval = null;
    for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
        fval = exec_check(env, stk, tst, asts[3], fr);
        break;
    }
    final boolean has_ffr = fval != null && fval.isFrame();
    final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
    final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
    final int[] fsIntMap = new int[tst.numCols()];
    String[][] domains = null;
    final int[][] maps = new int[tst.numCols()][];
    if (fs != null || ts != null) {
        // time to build domains...
        domains = new String[tst.numCols()][];
        if (fs != null && ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                // false => 0; truth => 1
                domains[i] = new String[] { fs, ts };
                fsIntMap[i] = 0;
                tsIntMap[i] = 1;
            }
        } else if (ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_ffr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = ts;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    tsIntMap[i] = ArrayUtils.find(dom, ts);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        } else {
            // fs!=null
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_tfr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = fs;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    fsIntMap[i] = ArrayUtils.find(dom, fs);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        }
    }
    // Now pick from left-or-right in the new frame
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] nchks) {
            assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
            for (int i = 0; i < nchks.length; i++) {
                Chunk ctst = chks[i];
                NewChunk res = nchks[i];
                for (int row = 0; row < ctst._len; row++) {
                    double d;
                    if (ctst.isNA(row))
                        d = Double.NaN;
                    else if (ctst.atd(row) == 0)
                        d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
                    else
                        d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
                    res.addNum(d);
                }
            }
        }
    }.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
    // flatten domains since they may be larger than needed
    if (domains != null) {
        for (int i = 0; i < res.numCols(); ++i) {
            if (res.vec(i).domain() != null) {
                final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
                String[] newDomain = new String[dom.length];
                for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
                new MRTask() {

                    @Override
                    public void map(Chunk c) {
                        for (int i = 0; i < c._len; ++i) {
                            if (!c.isNA(i))
                                c.set(i, ArrayUtils.find(dom, c.at8(i)));
                        }
                    }
                }.doAll(res.vec(i));
                // needs a DKVput?
                res.vec(i).setDomain(newDomain);
            }
        }
    }
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 69 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstMad method mad.

public static double mad(Frame f, QuantileModel.CombineMethod cm, double constant) {
    // need Frames everywhere because of QuantileModel demanding a Frame...
    Key tk = null;
    if (f._key == null) {
        DKV.put(tk = Key.make(), f = new Frame(tk, f.names(), f.vecs()));
    }
    final double median = AstMedian.median(f, cm);
    Frame abs_dev = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            for (int i = 0; i < c._len; ++i) nc.addNum(Math.abs(c.at8(i) - median));
        }
    }.doAll(1, Vec.T_NUM, f).outputFrame();
    if (abs_dev._key == null) {
        DKV.put(tk = Key.make(), abs_dev = new Frame(tk, abs_dev.names(), abs_dev.vecs()));
    }
    double mad = AstMedian.median(abs_dev, cm);
    // drp mapping, keep vec
    DKV.remove(f._key);
    DKV.remove(abs_dev._key);
    return constant * mad;
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 70 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstDiffLag1 method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env).getFrame());
    if (fr.numCols() != 1)
        throw new IllegalArgumentException("Expected a single column for diff. Got: " + fr.numCols() + " columns.");
    if (!fr.anyVec().isNumeric())
        throw new IllegalArgumentException("Expected a numeric column for diff. Got: " + fr.anyVec().get_type_str());
    final double[] lastElemPerChk = GetLastElemPerChunkTask.get(fr.anyVec());
    return new ValFrame(new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            if (c.cidx() == 0)
                nc.addNA();
            else
                nc.addNum(c.atd(0) - lastElemPerChk[c.cidx() - 1]);
            for (int row = 1; row < c._len; ++row) nc.addNum(c.atd(row) - c.atd(row - 1));
        }
    }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

Chunk (water.fvec.Chunk)74 Frame (water.fvec.Frame)50 NewChunk (water.fvec.NewChunk)36 MRTask (water.MRTask)33 Vec (water.fvec.Vec)30 ValFrame (water.rapids.vals.ValFrame)26 C0DChunk (water.fvec.C0DChunk)7 BufferedString (water.parser.BufferedString)7 Random (java.util.Random)6 Test (org.junit.Test)5 MRTask2 (water.MRTask2)4 Val (water.rapids.Val)4 Key (water.Key)3 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)3 AstRoot (water.rapids.ast.AstRoot)3 AstNumList (water.rapids.ast.params.AstNumList)3 File (java.io.File)2 IOException (java.io.IOException)2 ValNum (water.rapids.vals.ValNum)2 PrettyPrint (water.util.PrettyPrint)2