Search in sources :

Example 31 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstTable method fast_table.

// -------------------------------------------------------------------------
// Fast-path for 1 integer column
private ValFrame fast_table(Vec v1, int ncols, String colname) {
    if (ncols != 1 || !v1.isInt())
        return null;
    long spanl = (long) v1.max() - (long) v1.min() + 1;
    // Cap at decent array size, for performance
    if (spanl > 1000000)
        return null;
    // First fast-pass counting
    AstTable.FastCnt fastCnt = new AstTable.FastCnt((long) v1.min(), (int) spanl).doAll(v1);
    final long[] cnts = fastCnt._cnts;
    final long minVal = fastCnt._min;
    // Second pass to build the result frame, skipping zeros
    Vec dataLayoutVec = Vec.makeCon(0, cnts.length);
    Frame fr = new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk nc0, NewChunk nc1) {
            final Chunk c = cs[0];
            for (int i = 0; i < c._len; ++i) {
                int idx = (int) (i + c.start());
                if (cnts[idx] > 0) {
                    nc0.addNum(idx + minVal);
                    nc1.addNum(cnts[idx]);
                }
            }
        }
    }.doAll(new byte[] { Vec.T_NUM, Vec.T_NUM }, dataLayoutVec).outputFrame(new String[] { colname, "Count" }, new String[][] { v1.domain(), null });
    dataLayoutVec.remove();
    return new ValFrame(fr);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 32 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstIsNa method exec.

@Override
public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = "isNA(" + fr.name(i) + ")";
            }
            return new ValFrame(new MRTask() {

                @Override
                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(c.isNA(i) ? 1 : 0);
                    }
                }
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.STR:
            return new ValNum(val.getStr() == null ? 1 : 0);
        default:
            throw H2O.unimpl("is.na unimpl: " + val.getClass());
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 33 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstLevels method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    Futures fs = new Futures();
    Key[] keys = Vec.VectorGroup.VG_LEN1.addVecs(f.numCols());
    Vec[] vecs = new Vec[keys.length];
    // compute the longest vec... that's the one with the most domain levels
    int max = 0;
    for (int i = 0; i < f.numCols(); ++i) if (f.vec(i).isCategorical())
        if (max < f.vec(i).domain().length)
            max = f.vec(i).domain().length;
    final int rowLayout = Vec.ESPC.rowLayout(keys[0], new long[] { 0, max });
    for (int i = 0; i < f.numCols(); ++i) {
        AppendableVec v = new AppendableVec(keys[i], Vec.T_NUM);
        NewChunk nc = new NewChunk(v, 0);
        String[] dom = f.vec(i).domain();
        int numToPad = dom == null ? max : max - dom.length;
        if (dom != null)
            for (int j = 0; j < dom.length; ++j) nc.addNum(j);
        for (int j = 0; j < numToPad; ++j) nc.addNA();
        nc.close(0, fs);
        vecs[i] = v.close(rowLayout, fs);
        vecs[i].setDomain(dom);
    }
    fs.blockForPending();
    Frame fr2 = new Frame(vecs);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Futures(water.Futures) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) AppendableVec(water.fvec.AppendableVec) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 34 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final double d, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 35 with NewChunk

use of water.fvec.NewChunk in project h2o-3 by h2oai.

the class AstIfElse method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val val = stk.track(asts[1].exec(env));
    if (val.isNum()) {
        // Scalar test, scalar result
        double d = val.getNum();
        if (Double.isNaN(d))
            return new ValNum(Double.NaN);
        // exec only 1 of false and true
        Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
        return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
    }
    // Frame test.  Frame result.
    if (val.type() == Val.ROW)
        return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
    Frame tst = val.getFrame();
    // If all zero's, return false and never execute true.
    Frame fr = new Frame(tst);
    Val tval = null;
    for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
        tval = exec_check(env, stk, tst, asts[2], fr);
        break;
    }
    final boolean has_tfr = tval != null && tval.isFrame();
    final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
    final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
    final int[] tsIntMap = new int[tst.numCols()];
    // If all nonzero's (or NA's), then never execute false.
    Val fval = null;
    for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
        fval = exec_check(env, stk, tst, asts[3], fr);
        break;
    }
    final boolean has_ffr = fval != null && fval.isFrame();
    final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
    final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
    final int[] fsIntMap = new int[tst.numCols()];
    String[][] domains = null;
    final int[][] maps = new int[tst.numCols()][];
    if (fs != null || ts != null) {
        // time to build domains...
        domains = new String[tst.numCols()][];
        if (fs != null && ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                // false => 0; truth => 1
                domains[i] = new String[] { fs, ts };
                fsIntMap[i] = 0;
                tsIntMap[i] = 1;
            }
        } else if (ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_ffr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = ts;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    tsIntMap[i] = ArrayUtils.find(dom, ts);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        } else {
            // fs!=null
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_tfr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = fs;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    fsIntMap[i] = ArrayUtils.find(dom, fs);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        }
    }
    // Now pick from left-or-right in the new frame
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] nchks) {
            assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
            for (int i = 0; i < nchks.length; i++) {
                Chunk ctst = chks[i];
                NewChunk res = nchks[i];
                for (int row = 0; row < ctst._len; row++) {
                    double d;
                    if (ctst.isNA(row))
                        d = Double.NaN;
                    else if (ctst.atd(row) == 0)
                        d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
                    else
                        d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
                    res.addNum(d);
                }
            }
        }
    }.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
    // flatten domains since they may be larger than needed
    if (domains != null) {
        for (int i = 0; i < res.numCols(); ++i) {
            if (res.vec(i).domain() != null) {
                final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
                String[] newDomain = new String[dom.length];
                for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
                new MRTask() {

                    @Override
                    public void map(Chunk c) {
                        for (int i = 0; i < c._len; ++i) {
                            if (!c.isNA(i))
                                c.set(i, ArrayUtils.find(dom, c.at8(i)));
                        }
                    }
                }.doAll(res.vec(i));
                // needs a DKVput?
                res.vec(i).setDomain(newDomain);
            }
        }
    }
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) Vec(water.fvec.Vec) MRTask(water.MRTask)

Aggregations

NewChunk (water.fvec.NewChunk)42 Frame (water.fvec.Frame)34 Chunk (water.fvec.Chunk)32 ValFrame (water.rapids.vals.ValFrame)23 MRTask (water.MRTask)22 Vec (water.fvec.Vec)19 AppendableVec (water.fvec.AppendableVec)8 BufferedString (water.parser.BufferedString)6 Key (water.Key)5 Futures (water.Futures)4 MRTask2 (water.MRTask2)4 ValNum (water.rapids.vals.ValNum)4 C0DChunk (water.fvec.C0DChunk)3 Val (water.rapids.Val)2 ValRow (water.rapids.vals.ValRow)2 ConfusionMatrix (hex.ConfusionMatrix)1 MersenneTwisterRNG (hex.rng.MersenneTwisterRNG)1 DataInputStream (java.io.DataInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1