Search in sources :

Example 6 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstMatch method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    if ((fr.numCols() != 1) || !(fr.anyVec().isCategorical() || fr.anyVec().isString()))
        throw new IllegalArgumentException("can only match on a single categorical/string column.");
    final MRTask<?> matchTask;
    double noMatch = asts[3].exec(env).getNum();
    if (asts[2] instanceof AstNumList) {
        matchTask = new NumMatchTask(((AstNumList) asts[2]).sort().expand(), noMatch);
    } else if (asts[2] instanceof AstNum) {
        matchTask = new NumMatchTask(new double[] { asts[2].exec(env).getNum() }, noMatch);
    } else if (asts[2] instanceof AstStrList) {
        String[] values = ((AstStrList) asts[2])._strs;
        Arrays.sort(values);
        matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
    } else if (asts[2] instanceof AstStr) {
        String[] values = new String[] { asts[2].exec(env).getStr() };
        matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
    } else
        throw new IllegalArgumentException("Expected numbers/strings. Got: " + asts[2].getClass());
    Frame result = matchTask.doAll(Vec.T_NUM, fr.anyVec()).outputFrame();
    return new ValFrame(result);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) AstStr(water.rapids.ast.params.AstStr) AstNumList(water.rapids.ast.params.AstNumList)

Example 7 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstSubstring method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int startIndex = (int) asts[2].exec(env).getNum();
    if (startIndex < 0)
        startIndex = 0;
    int endIndex = asts[3] instanceof AstNumList ? Integer.MAX_VALUE : (int) asts[3].exec(env).getNum();
    // Type check
    for (Vec v : fr.vecs()) if (!(v.isCategorical() || v.isString()))
        throw new IllegalArgumentException("substring() requires a string or categorical column. " + "Received " + fr.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
    // Transform each vec
    Vec[] nvs = new Vec[fr.numCols()];
    int i = 0;
    for (Vec v : fr.vecs()) {
        if (v.isCategorical())
            nvs[i] = substringCategoricalCol(v, startIndex, endIndex);
        else
            nvs[i] = substringStringCol(v, startIndex, endIndex);
        i++;
    }
    return new ValFrame(new Frame(nvs));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) AstNumList(water.rapids.ast.params.AstNumList)

Example 8 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class RapidsTest method astNumList_ok.

private static void astNumList_ok(String expr, double[] expected) {
    AstRoot res = Rapids.parse(expr);
    assertTrue(res instanceof AstNumList);
    if (expected != null)
        assertArrayEquals(expected, ((AstNumList) res).expand(), 1e-10);
}
Also used : AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList)

Example 9 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstRectangleAssign method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame dst = stk.track(asts[1].exec(env)).getFrame();
    Val vsrc = stk.track(asts[2].exec(env));
    AstParameter col_list = (AstParameter) asts[3];
    // Column selection
    AstNumList cols_numlist = new AstNumList(col_list.columns(dst.names()));
    // Special for AstAssign: "empty" really means "all"
    if (cols_numlist.isEmpty())
        cols_numlist = new AstNumList(0, dst.numCols());
    // Allow R-like number list expansion: negative column numbers mean exclusion
    int[] cols = AstColSlice.col_select(dst.names(), cols_numlist);
    // Any COW optimized path changes Vecs in dst._vecs, and so needs a
    // defensive copy.  Any update-in-place path updates Chunks instead of
    // dst._vecs, and does not need a defensive copy.  To make life easier,
    // just make the copy now.
    dst = new Frame(dst._names, dst.vecs().clone());
    // Assign over the column slice
    if (asts[4] instanceof AstNum || asts[4] instanceof AstNumList) {
        // Explictly named row assignment
        AstNumList rows = (asts[4] instanceof AstNum) ? new AstNumList(((AstNum) asts[4]).getNum()) : ((AstNumList) asts[4]);
        // Empty rows is really: all rows
        if (rows.isEmpty())
            rows = new AstNumList(0, dst.numRows());
        switch(vsrc.type()) {
            case Val.NUM:
                assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
                break;
            case Val.STR:
                assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
                break;
            case Val.FRM:
                assign_frame_frame(dst, cols, rows, vsrc.getFrame(), env._ses);
                break;
            default:
                throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
        }
    } else {
        // Boolean assignment selection?
        Frame rows = stk.track(asts[4].exec(env)).getFrame();
        switch(vsrc.type()) {
            case Val.NUM:
                assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
                break;
            case Val.STR:
                assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
                break;
            case Val.FRM:
                throw H2O.unimpl();
            default:
                throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
        }
    }
    return new ValFrame(dst);
}
Also used : ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstParameter(water.rapids.ast.AstParameter) AstNumList(water.rapids.ast.params.AstNumList)

Example 10 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstHist method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // stack is [ ..., ary, breaks]
    // handle the breaks
    Frame fr2;
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    Vec vec = f.anyVec();
    if (!vec.isNumeric())
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    //TODO Add case when vec is a constant numeric
    if (vec.isConst())
        throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
    AstRoot a = asts[2];
    String algo = null;
    int numBreaks = -1;
    double[] breaks = null;
    if (a instanceof AstStr)
        algo = a.str().toLowerCase();
    else if (a instanceof AstNumList)
        breaks = ((AstNumList) a).expand();
    else if (a instanceof AstNum)
        numBreaks = (int) a.exec(env).getNum();
    AstHist.HistTask t;
    double h;
    double x1 = vec.max();
    double x0 = vec.min();
    if (breaks != null)
        t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
    else if (algo != null) {
        switch(algo) {
            case "sturges":
                numBreaks = sturges(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "rice":
                numBreaks = rice(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "sqrt":
                numBreaks = sqrt(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "doane":
                numBreaks = doane(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "scott":
                h = scotts_h(vec);
                numBreaks = scott(vec, h);
                // special bin width computation
                break;
            case "fd":
                h = fds_h(vec);
                numBreaks = fd(vec, h);
                // special bin width computation
                break;
            default:
                numBreaks = sturges(vec);
                // just do sturges even if junk passed in
                h = (x1 - x0) / numBreaks;
        }
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    } else {
        h = (x1 - x0) / numBreaks;
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    }
    // wanna make a new frame here [breaks,counts,mids]
    final double[] brks = t._breaks;
    final long[] cnts = t._counts;
    final double[] mids_true = t._mids;
    final double[] mids = new double[t._breaks.length - 1];
    for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
    Vec layoutVec = Vec.makeZero(brks.length);
    fr2 = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] nc) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                nc[0].addNum(brks[i + start]);
                if (i == 0) {
                    nc[1].addNA();
                    nc[2].addNA();
                    nc[3].addNA();
                } else {
                    nc[1].addNum(cnts[(i - 1) + start]);
                    nc[2].addNum(mids_true[(i - 1) + start]);
                    nc[3].addNum(mids[(i - 1) + start]);
                }
            }
        }
    }.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
    layoutVec.remove();
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) Vec(water.fvec.Vec) AstStr(water.rapids.ast.params.AstStr) MRTask(water.MRTask) AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList)

Aggregations

AstNumList (water.rapids.ast.params.AstNumList)12 ValFrame (water.rapids.vals.ValFrame)11 Frame (water.fvec.Frame)8 AstNum (water.rapids.ast.params.AstNum)6 MRTask (water.MRTask)4 Chunk (water.fvec.Chunk)3 AstRoot (water.rapids.ast.AstRoot)3 AstStr (water.rapids.ast.params.AstStr)3 AstStrList (water.rapids.ast.params.AstStrList)3 Vec (water.fvec.Vec)2 Val (water.rapids.Val)2 QuantileModel (hex.quantile.QuantileModel)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 NewChunk (water.fvec.NewChunk)1 BufferedString (water.parser.BufferedString)1 AstExec (water.rapids.ast.AstExec)1 AstFrame (water.rapids.ast.AstFrame)1 AstParameter (water.rapids.ast.AstParameter)1 AstId (water.rapids.ast.params.AstId)1