Search in sources :

Example 51 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstEntropy method entropyStringCol.

private Vec entropyStringCol(Vec vec) {
    return new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (//all NAs
            chk instanceof C0DChunk)
                newChk.addNAs(chk.len());
            else if (//fast-path operations
            ((CStrChunk) chk)._isAllASCII)
                ((CStrChunk) chk).asciiEntropy(newChk);
            else {
                //UTF requires Java string methods
                BufferedString tmpStr = new BufferedString();
                for (int i = 0; i < chk._len; i++) {
                    if (chk.isNA(i))
                        newChk.addNA();
                    else {
                        String str = chk.atStr(tmpStr, i).toString();
                        newChk.addNum(calcEntropy(str));
                    }
                }
            }
        }
    }.doAll(new byte[] { Vec.T_NUM }, vec).outputFrame().anyVec();
}
Also used : MRTask(water.MRTask) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 52 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstReLevel method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    if (fr.numCols() != 1)
        throw new IllegalArgumentException("`setLevel` works on a single column at a time.");
    String[] doms = fr.anyVec().domain().clone();
    if (doms == null)
        throw new IllegalArgumentException("Cannot set the level on a non-factor column!");
    String lvl = asts[2].exec(env).getStr();
    final int idx = Arrays.asList(doms).indexOf(lvl);
    if (idx == -1)
        throw new IllegalArgumentException("Did not find level `" + lvl + "` in the column.");
    if (idx == 0)
        return new ValFrame(new Frame(fr.names(), new Vec[] { fr.anyVec().makeCopy() }));
    String[] srcDom = fr.anyVec().domain();
    final String[] dom = new String[srcDom.length];
    dom[0] = srcDom[idx];
    int j = 1;
    for (int i = 0; i < srcDom.length; ++i) if (i != idx)
        dom[j++] = srcDom[i];
    return new ValFrame(new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            int[] vals = new int[c._len];
            c.getIntegers(vals, 0, c._len, -1);
            for (int i = 0; i < vals.length; ++i) if (vals[i] == -1)
                nc.addNA();
            else if (vals[i] == idx)
                nc.addNum(0);
            else
                nc.addNum(vals[i] + (vals[i] < idx ? 1 : 0));
        }
    }.doAll(1, Vec.T_CAT, fr).outputFrame(fr.names(), new String[][] { dom }));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 53 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstSetLevel method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    if (fr.numCols() != 1)
        throw new IllegalArgumentException("`setLevel` works on a single column at a time.");
    String[] doms = fr.anyVec().domain().clone();
    if (doms == null)
        throw new IllegalArgumentException("Cannot set the level on a non-factor column!");
    String lvl = asts[2].exec(env).getStr();
    final int idx = Arrays.asList(doms).indexOf(lvl);
    if (idx == -1)
        throw new IllegalArgumentException("Did not find level `" + lvl + "` in the column.");
    // COW semantics
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            for (int i = 0; i < c._len; ++i) nc.addNum(idx);
        }
    }.doAll(new byte[] { Vec.T_NUM }, fr.anyVec()).outputFrame(null, fr.names(), fr.domains());
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 54 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstBinOp method frame_op_scalar.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame frame_op_scalar(Frame fr, final String str) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString vstr = new BufferedString();
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                Vec vec = chk.vec();
                // String Vectors: apply str_op as BufferedStrings to all elements
                if (vec.isString()) {
                    final BufferedString conStr = new BufferedString(str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(str_op(chk.atStr(vstr, i), conStr));
                } else if (vec.isCategorical()) {
                    // categorical Vectors: convert string to domain value; apply op (not
                    // str_op).  Not sure what the "right" behavior here is, can
                    // easily argue that should instead apply str_op to the categorical
                    // string domain value - except that this whole operation only
                    // makes sense for EQ/NE, and is much faster when just comparing
                    // doubles vs comparing strings.  Note that if the string is not
                    // part of the categorical domain, the find op returns -1 which is never
                    // equal to any categorical dense integer (which are always 0+).
                    final double d = (double) ArrayUtils.find(vec.domain(), str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(op(chk.atd(i), d));
                } else {
                    // mixing string and numeric
                    // false or true only
                    final double d = op(1, 2);
                    for (int i = 0; i < chk._len; i++) cres.addNum(d);
                }
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 55 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstGroupedPermute method buildOutput.

private static Frame buildOutput(final double[][][] a, String[] names, String[][] domains) {
    Frame dVec = new Frame(Vec.makeSeq(0, a.length));
    long s = System.currentTimeMillis();
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk[] ncs) {
            for (int i = 0; i < cs[0]._len; ++i) for (double[] anAa : a[(int) cs[0].at8(i)]) for (int k = 0; k < anAa.length; ++k) ncs[k].addNum(anAa[k]);
        }
    }.doAll(5, Vec.T_NUM, dVec).outputFrame(null, names, domains);
    Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
    dVec.delete();
    return res;
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1