Search in sources :

Example 46 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstMean method rowwiseMean.

/**
   * Compute Frame means by rows, and return a frame consisting of a single Vec of means in each row.
   */
private ValFrame rowwiseMean(Frame fr, final boolean na_rm) {
    String[] newnames = { "mean" };
    Key<Frame> newkey = Key.make();
    // Determine how many columns of different types we have
    int n_numeric = 0, n_time = 0;
    for (Vec vec : fr.vecs()) {
        if (vec.isNumeric())
            n_numeric++;
        if (vec.isTime())
            n_time++;
    }
    // Compute the type of the resulting column: if all columns are TIME then the result is also time; otherwise
    // if at least one column is numeric then the result is also numeric.
    byte resType = n_numeric > 0 ? Vec.T_NUM : Vec.T_TIME;
    // Construct the frame over which the mean should be computed
    Frame compFrame = new Frame();
    for (int i = 0; i < fr.numCols(); i++) {
        Vec vec = fr.vec(i);
        if (n_numeric > 0 ? vec.isNumeric() : vec.isTime())
            compFrame.add(fr.name(i), vec);
    }
    Vec anyvec = compFrame.anyVec();
    // Take into account certain corner cases
    if (anyvec == null) {
        Frame res = new Frame(newkey);
        anyvec = fr.anyVec();
        if (anyvec != null) {
            // All columns in the original frame are non-numeric -> return a vec of NAs
            res.add("mean", anyvec.makeCon(Double.NaN));
        }
        // else the original frame is empty, in which case we return an empty frame too
        return new ValFrame(res);
    }
    if (!na_rm && n_numeric < fr.numCols() && n_time < fr.numCols()) {
        // If some of the columns are non-numeric and na_rm==false, then the result is a vec of NAs
        Frame res = new Frame(newkey, newnames, new Vec[] { anyvec.makeCon(Double.NaN) });
        return new ValFrame(res);
    }
    // Compute the mean over all rows
    final int numCols = compFrame.numCols();
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk nc) {
            for (int i = 0; i < cs[0]._len; i++) {
                double d = 0;
                int numNaColumns = 0;
                for (int j = 0; j < numCols; j++) {
                    double val = cs[j].atd(i);
                    if (Double.isNaN(val))
                        numNaColumns++;
                    else
                        d += val;
                }
                if (na_rm ? numNaColumns < numCols : numNaColumns == 0)
                    nc.addNum(d / (numCols - numNaColumns));
                else
                    nc.addNum(Double.NaN);
            }
        }
    }.doAll(1, resType, compFrame).outputFrame(newkey, newnames, null);
    // Return the result
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 47 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstDiffLag1 method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env).getFrame());
    if (fr.numCols() != 1)
        throw new IllegalArgumentException("Expected a single column for diff. Got: " + fr.numCols() + " columns.");
    if (!fr.anyVec().isNumeric())
        throw new IllegalArgumentException("Expected a numeric column for diff. Got: " + fr.anyVec().get_type_str());
    final double[] lastElemPerChk = GetLastElemPerChunkTask.get(fr.anyVec());
    return new ValFrame(new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            if (c.cidx() == 0)
                nc.addNA();
            else
                nc.addNum(c.atd(0) - lastElemPerChk[c.cidx() - 1]);
            for (int row = 1; row < c._len; ++row) nc.addNum(c.atd(row) - c.atd(row - 1));
        }
    }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 48 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class TypedFrame method makeVec.

protected Vec makeVec() throws IOException {
    final Vec vec0 = buildZeroVec();
    MRTask task = new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (Chunk c : cs) {
                DataChunk<X> tc = factory.apply(c);
                for (int r = 0; r < c._len; r++) {
                    long i = r + c.start();
                    tc.set(r, function.apply(i));
                }
            }
        }
    };
    MRTask mrTask = task.doAll(vec0);
    return mrTask._fr.vecs()[0];
}
Also used : Vec(water.fvec.Vec) MRTask(water.MRTask) Chunk(water.fvec.Chunk)

Example 49 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstMktime method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // Seven args, all required.  See if any are arrays.
    Frame[] fs = new Frame[nargs() - 1];
    int[] is = new int[nargs() - 1];
    // Sample frame (for auto-expanding constants)
    Frame x = null;
    for (int i = 1; i < nargs(); i++) if (asts[i] instanceof AstId || asts[i] instanceof AstExec)
        fs[i - 1] = x = stk.track(asts[i].exec(env)).getFrame();
    else
        is[i - 1] = (int) asts[i].exec(env).getNum();
    if (x == null) {
        // Single point
        long msec = new MutableDateTime(// year
        is[0], // month
        is[1] + 1, // day
        is[2] + 1, // hour
        is[3], // minute
        is[4], // second
        is[5], // msec
        is[6]).getMillis();
        return new ValNum(msec);
    }
    // Make constant Vecs for the constant args.  Commonly, they'll all be zero
    Vec[] vecs = new Vec[7];
    for (int i = 0; i < 7; i++) {
        if (fs[i] == null) {
            vecs[i] = x.anyVec().makeCon(is[i]);
        } else {
            if (fs[i].numCols() != 1)
                throw new IllegalArgumentException("Expect single column");
            vecs[i] = fs[i].anyVec();
        }
    }
    // Convert whole column to epoch msec
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] nchks) {
            MutableDateTime dt = new MutableDateTime(0);
            NewChunk n = nchks[0];
            int rlen = chks[0]._len;
            for (int r = 0; r < rlen; r++) {
                dt.setDateTime(// year
                (int) chks[0].at8(r), // month
                (int) chks[1].at8(r) + 1, // day
                (int) chks[2].at8(r) + 1, // hour
                (int) chks[3].at8(r), // minute
                (int) chks[4].at8(r), // second
                (int) chks[5].at8(r), // msec
                (int) chks[6].at8(r));
                n.addNum(dt.getMillis());
            }
        }
    }.doAll(new byte[] { Vec.T_NUM }, vecs).outputFrame(new String[] { "msec" }, null);
    // Clean up the constants
    for (int i = 0; i < nargs() - 1; i++) if (fs[i] == null)
        vecs[i].remove();
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstExec(water.rapids.ast.AstExec) MutableDateTime(org.joda.time.MutableDateTime) ValNum(water.rapids.vals.ValNum) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) AstId(water.rapids.ast.params.AstId) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 50 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstWhich method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    // The 1-row version
    if (f.numRows() == 1 && f.numCols() > 1) {
        AppendableVec v = new AppendableVec(Vec.VectorGroup.VG_LEN1.addVec(), Vec.T_NUM);
        NewChunk chunk = new NewChunk(v, 0);
        for (int i = 0; i < f.numCols(); i++) if (f.vecs()[i].at8(0) != 0)
            chunk.addNum(i);
        Futures fs = chunk.close(0, new Futures());
        Vec vec = v.layout_and_close(fs);
        fs.blockForPending();
        return new ValFrame(new Frame(vec));
    }
    // The 1-column version
    Vec vec = f.anyVec();
    if (f.numCols() > 1 || !vec.isInt())
        throw new IllegalArgumentException("which requires a single integer column");
    Frame f2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            long start = c.start();
            for (int i = 0; i < c._len; ++i) if (c.at8(i) != 0)
                nc.addNum(start + i);
        }
    }.doAll(new byte[] { Vec.T_NUM }, vec).outputFrame();
    return new ValFrame(f2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Futures(water.Futures) MRTask(water.MRTask)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1