Search in sources :

Example 16 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstReplaceFirst method replaceFirstStringCol.

private Vec replaceFirstStringCol(Vec vec, String pat, String rep, boolean ic) {
    final String pattern = pat;
    final String replacement = rep;
    final boolean ignoreCase = ic;
    return new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (// all NAs
            chk instanceof C0DChunk)
                for (int i = 0; i < chk.len(); i++) newChk.addNA();
            else {
                //        if (((CStrChunk)chk)._isAllASCII) { // fast-path operations
                //          ((CStrChunk) chk).asciiReplaceFirst(newChk);
                //        } else { //UTF requires Java string methods for accuracy
                BufferedString tmpStr = new BufferedString();
                for (int i = 0; i < chk._len; i++) {
                    if (chk.isNA(i))
                        newChk.addNA();
                    else {
                        if (ignoreCase)
                            newChk.addStr(chk.atStr(tmpStr, i).toString().toLowerCase(Locale.ENGLISH).replaceFirst(pattern, replacement));
                        else
                            newChk.addStr(chk.atStr(tmpStr, i).toString().replaceFirst(pattern, replacement));
                    }
                }
            }
        }
    }.doAll(new byte[] { Vec.T_STR }, vec).outputFrame().anyVec();
}
Also used : MRTask(water.MRTask) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 17 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstStrSplit method strSplitCategoricalCol.

private Vec[] strSplitCategoricalCol(Vec vec, String splitRegEx) {
    final String[] old_domains = vec.domain();
    final String[][] new_domains = newDomains(old_domains, splitRegEx);
    final String regex = splitRegEx;
    return new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk[] ncs) {
            Chunk c = cs[0];
            for (int i = 0; i < c._len; ++i) {
                int cnt = 0;
                if (!c.isNA(i)) {
                    int idx = (int) c.at8(i);
                    String s = old_domains[idx];
                    String[] ss = s.split(regex);
                    for (String s1 : ss) {
                        int n_idx = Arrays.asList(new_domains[cnt]).indexOf(s1);
                        if (n_idx == -1)
                            ncs[cnt++].addNA();
                        else
                            ncs[cnt++].addNum(n_idx);
                    }
                }
                if (cnt < ncs.length)
                    for (; cnt < ncs.length; ++cnt) ncs[cnt].addNA();
            }
        }
    }.doAll(new_domains.length, Vec.T_CAT, new Frame(vec)).outputFrame(null, null, new_domains).vecs();
}
Also used : ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString)

Example 18 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstSetTimeZone method apply.

@Override
public ValNum apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    final String tz = asts[1].exec(env).getStr();
    Set<String> idSet = DateTimeZone.getAvailableIDs();
    if (!idSet.contains(tz))
        throw new IllegalArgumentException("Unacceptable timezone " + tz + " given.  For a list of acceptable names, use listTimezone().");
    new MRTask() {

        @Override
        public void setupLocal() {
            ParseTime.setTimezone(tz);
        }
    }.doAllNodes();
    return new ValNum(Double.NaN);
}
Also used : MRTask(water.MRTask) ValNum(water.rapids.vals.ValNum)

Example 19 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstBinOp method vec_op_frame.

private ValFrame vec_op_frame(Vec vec, Frame fr) {
    // Already checked for same rows, non-zero frame
    Frame rt = new Frame(fr);
    rt.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk clf = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk crt = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, rt).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 20 with MRTask

use of water.MRTask in project h2o-3 by h2oai.

the class AstBinOp method frame_op_row.

private ValFrame frame_op_row(Frame lf, Frame row) {
    final double[] rawRow = new double[row.numCols()];
    for (int i = 0; i < rawRow.length; ++i) // is numberlike, if not then NaN
    rawRow[i] = row.vec(i).isNumeric() || row.vec(i).isTime() ? row.vec(i).at(0) : Double.NaN;
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                NewChunk cres = cress[c];
                for (int r = 0; r < clf._len; ++r) {
                    if (clf.vec().isString())
                        // TODO: improve
                        cres.addNum(Double.NaN);
                    else
                        cres.addNum(op(clf.atd(r), rawRow[c]));
                }
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, lf).outputFrame(lf._names, null);
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

MRTask (water.MRTask)55 ValFrame (water.rapids.vals.ValFrame)37 Chunk (water.fvec.Chunk)33 Frame (water.fvec.Frame)33 NewChunk (water.fvec.NewChunk)23 Vec (water.fvec.Vec)17 BufferedString (water.parser.BufferedString)9 ValNum (water.rapids.vals.ValNum)6 Val (water.rapids.Val)5 AstRoot (water.rapids.ast.AstRoot)4 AstNumList (water.rapids.ast.params.AstNumList)4 Key (water.Key)3 Test (org.junit.Test)2 Futures (water.Futures)2 AstNum (water.rapids.ast.params.AstNum)2 AstStr (water.rapids.ast.params.AstStr)2 AstStrList (water.rapids.ast.params.AstStrList)2 AstGroup (water.rapids.ast.prims.mungers.AstGroup)2 ValRow (water.rapids.vals.ValRow)2 DataInfo (hex.DataInfo)1