Search in sources :

Example 11 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final String str, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString vstr = new BufferedString();
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                Vec vec = chk.vec();
                // String Vectors: apply str_op as BufferedStrings to all elements
                if (vec.isString()) {
                    final BufferedString conStr = new BufferedString(str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(str_op(conStr, chk.atStr(vstr, i)));
                } else if (vec.isCategorical()) {
                    // categorical Vectors: convert string to domain value; apply op (not
                    // str_op).  Not sure what the "right" behavior here is, can
                    // easily argue that should instead apply str_op to the categorical
                    // string domain value - except that this whole operation only
                    // makes sense for EQ/NE, and is much faster when just comparing
                    // doubles vs comparing strings.
                    final double d = (double) ArrayUtils.find(vec.domain(), str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
                } else {
                    // mixing string and numeric
                    // false or true only
                    final double d = op(1, 2);
                    for (int i = 0; i < chk._len; i++) cres.addNum(d);
                }
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 12 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstSumAxis method rowwiseSum.

/**
   * Compute Frame sum for each row. This returns a frame consisting of a single Vec of sums in each row.
   */
private ValFrame rowwiseSum(Frame fr, final boolean na_rm) {
    String[] newnames = { "sum" };
    Key<Frame> newkey = Key.make();
    // Determine how many columns of different types we have
    int n_numeric = 0, n_time = 0;
    for (Vec vec : fr.vecs()) {
        if (vec.isNumeric())
            n_numeric++;
        if (vec.isTime())
            n_time++;
    }
    // Compute the type of the resulting column: if all columns are TIME then the result is also time; otherwise
    // if at least one column is numeric then the result is also numeric.
    byte resType = n_numeric > 0 ? Vec.T_NUM : Vec.T_TIME;
    // Construct the frame over which the sum should be computed
    Frame compFrame = new Frame();
    for (int i = 0; i < fr.numCols(); i++) {
        Vec vec = fr.vec(i);
        if (n_numeric > 0 ? vec.isNumeric() : vec.isTime())
            compFrame.add(fr.name(i), vec);
    }
    Vec anyvec = compFrame.anyVec();
    //Certain corner cases
    if (anyvec == null) {
        Frame res = new Frame(newkey);
        anyvec = fr.anyVec();
        if (anyvec != null) {
            // All columns in the original frame are non-numeric? Return a vec of NAs
            res.add("sum", anyvec.makeCon(Double.NaN));
        }
        // else the original frame is empty, in which case we return an empty frame too
        return new ValFrame(res);
    }
    if (!na_rm && n_numeric < fr.numCols() && n_time < fr.numCols()) {
        // If some of the columns are non-numeric and na_rm==false, then the result is a vec of NAs
        Frame res = new Frame(newkey, newnames, new Vec[] { anyvec.makeCon(Double.NaN) });
        return new ValFrame(res);
    }
    // Compute the sum over all rows
    final int numCols = compFrame.numCols();
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk nc) {
            for (int i = 0; i < cs[0]._len; i++) {
                double d = 0;
                int numNaColumns = 0;
                for (int j = 0; j < numCols; j++) {
                    double val = cs[j].atd(i);
                    if (Double.isNaN(val))
                        numNaColumns++;
                    else
                        d += val;
                }
                if (na_rm ? numNaColumns < numCols : numNaColumns == 0)
                    nc.addNum(d);
                else
                    nc.addNum(Double.NaN);
            }
        }
    }.doAll(1, resType, compFrame).outputFrame(newkey, newnames, null);
    // Return the result
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 13 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstRepLen method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val v = asts[1].exec(env);
    long length = (long) asts[2].exec(env).getNum();
    Frame ff;
    if (v instanceof ValFrame)
        ff = stk.track(v).getFrame();
    else
        return new ValFrame(new Frame(Vec.makeCon(v.getNum(), length)));
    final Frame fr = ff;
    if (fr.numCols() == 1) {
        Vec vec = Vec.makeRepSeq(length, fr.numRows());
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) c.set(i, fr.anyVec().at((long) c.atd(i)));
            }
        }.doAll(vec);
        vec.setDomain(fr.anyVec().domain());
        return new ValFrame(new Frame(vec));
    } else {
        Frame f = new Frame();
        for (int i = 0; i < length; ++i) f.add(Frame.defaultColName(f.numCols()), fr.vec(i % fr.numCols()));
        return new ValFrame(f);
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) Chunk(water.fvec.Chunk)

Example 14 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstAsDate method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Vec vec = fr.vecs()[0];
    if (fr.vecs().length != 1 || !(vec.isCategorical() || vec.isString()))
        throw new IllegalArgumentException("as.Date requires a single column of factors or strings");
    final String format = asts[2].exec(env).getStr();
    if (format.isEmpty())
        throw new IllegalArgumentException("as.Date requires a non-empty format string");
    // check the format string more?
    final String[] dom = vec.domain();
    final boolean isStr = dom == null && vec.isString();
    assert isStr || dom != null : "as.Date error: domain is null, but vec is not String";
    Frame fr2 = new MRTask() {

        private transient DateTimeFormatter _fmt;

        @Override
        public void setupLocal() {
            _fmt = ParseTime.forStrptimePattern(format).withZone(ParseTime.getTimezone());
        }

        @Override
        public void map(Chunk c, NewChunk nc) {
            //done on each node in lieu of rewriting DateTimeFormatter as Iced
            String date;
            BufferedString tmpStr = new BufferedString();
            for (int i = 0; i < c._len; ++i) {
                if (!c.isNA(i)) {
                    if (isStr)
                        date = c.atStr(tmpStr, i).toString();
                    else
                        date = dom[(int) c.at8(i)];
                    nc.addNum(DateTime.parse(date, _fmt).getMillis(), 0);
                } else
                    nc.addNA();
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 15 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstReplaceAll method replaceAllStringCol.

private Vec replaceAllStringCol(Vec vec, String pat, String rep, boolean ic) {
    final String pattern = pat;
    final String replacement = rep;
    final boolean ignoreCase = ic;
    return new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (// all NAs
            chk instanceof C0DChunk)
                for (int i = 0; i < chk.len(); i++) newChk.addNA();
            else {
                //        if (((CStrChunk)chk)._isAllASCII) { // fast-path operations
                //          ((CStrChunk) chk).asciiReplaceAll(newChk);
                //        } else { //UTF requires Java string methods for accuracy
                BufferedString tmpStr = new BufferedString();
                for (int i = 0; i < chk._len; i++) {
                    if (chk.isNA(i))
                        newChk.addNA();
                    else {
                        if (ignoreCase)
                            newChk.addStr(chk.atStr(tmpStr, i).toString().toLowerCase(Locale.ENGLISH).replaceAll(pattern, replacement));
                        else
                            newChk.addStr(chk.atStr(tmpStr, i).toString().replaceAll(pattern, replacement));
                    }
                }
            }
        }
    }.doAll(new byte[] { Vec.T_STR }, vec).outputFrame().anyVec();
}
Also used : MRTask(water.MRTask) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1