Search in sources :

Example 1 with AstNum

use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.

the class AstRowSlice method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame returningFrame;
    long nrows = fr.numRows();
    if (asts[2] instanceof AstNumList) {
        final AstNumList nums = (AstNumList) asts[2];
        if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
            throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
        long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
        if (rows != null) {
            if (rows.length == 0) {
            // Empty inclusion list?
            } else if (rows[0] >= 0) {
                // Positive (inclusion) list
                if (rows[rows.length - 1] > nrows)
                    throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
            } else {
                // Negative (exclusion) list
                if (rows[rows.length - 1] >= 0)
                    throw new IllegalArgumentException("Cannot mix negative and postive row selection");
                // Invert the list to make a positive list, ignoring out-of-bounds values
                BitSet bs = new BitSet((int) nrows);
                for (long row : rows) {
                    // The positive index
                    int idx = (int) (-row - 1);
                    if (idx >= 0 && idx < nrows)
                        // Set column to EXCLUDE
                        bs.set(idx);
                }
                rows = new long[(int) nrows - bs.cardinality()];
                for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
            }
        }
        final long[] ls = rows;
        returningFrame = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                if (nums.cnt() == 0)
                    return;
                if (ls != null && ls.length == 0)
                    return;
                long start = cs[0].start();
                long end = start + cs[0]._len;
                // exclusive max to inclusive max when stride == 1
                long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
                //5                   [ nums ]  nums run rite:  start <= nums.min() && end < nums.max()
                if (!(max < start || min > end)) {
                    // not situation 1 or 2 above
                    // situation 4 and 5 => min > start;
                    long startOffset = (min > start ? min : start);
                    for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
                        if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
                            for (int c = 0; c < cs.length; ++c) {
                                if (cs[c] instanceof CStrChunk)
                                    ncs[c].addStr(cs[c], i);
                                else if (cs[c] instanceof C16Chunk)
                                    ncs[c].addUUID(cs[c], i);
                                else if (cs[c].isNA(i))
                                    ncs[c].addNA();
                                else
                                    ncs[c].addNum(cs[c].atd(i));
                            }
                        }
                    }
                }
            }
        }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
    } else if ((asts[2] instanceof AstNum)) {
        long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
        returningFrame = fr.deepSlice(rows, null);
    } else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
        Frame predVec = stk.track(asts[2].exec(env)).getFrame();
        if (predVec.numCols() != 1)
            throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns.  Must be a boolean Vec.");
        returningFrame = fr.deepSlice(predVec, null);
    } else
        throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
    return new ValFrame(returningFrame);
}
Also used : ValFrame(water.rapids.vals.ValFrame) AstExec(water.rapids.ast.AstExec) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) AstId(water.rapids.ast.params.AstId) AstNumList(water.rapids.ast.params.AstNumList)

Example 2 with AstNum

use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.

the class AstMatch method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    if ((fr.numCols() != 1) || !(fr.anyVec().isCategorical() || fr.anyVec().isString()))
        throw new IllegalArgumentException("can only match on a single categorical/string column.");
    final MRTask<?> matchTask;
    double noMatch = asts[3].exec(env).getNum();
    if (asts[2] instanceof AstNumList) {
        matchTask = new NumMatchTask(((AstNumList) asts[2]).sort().expand(), noMatch);
    } else if (asts[2] instanceof AstNum) {
        matchTask = new NumMatchTask(new double[] { asts[2].exec(env).getNum() }, noMatch);
    } else if (asts[2] instanceof AstStrList) {
        String[] values = ((AstStrList) asts[2])._strs;
        Arrays.sort(values);
        matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
    } else if (asts[2] instanceof AstStr) {
        String[] values = new String[] { asts[2].exec(env).getStr() };
        matchTask = fr.anyVec().isString() ? new StrMatchTask(values, noMatch) : new CatMatchTask(values, noMatch);
    } else
        throw new IllegalArgumentException("Expected numbers/strings. Got: " + asts[2].getClass());
    Frame result = matchTask.doAll(Vec.T_NUM, fr.anyVec()).outputFrame();
    return new ValFrame(result);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) AstStr(water.rapids.ast.params.AstStr) AstNumList(water.rapids.ast.params.AstNumList)

Example 3 with AstNum

use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.

the class AstRectangleAssign method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame dst = stk.track(asts[1].exec(env)).getFrame();
    Val vsrc = stk.track(asts[2].exec(env));
    AstParameter col_list = (AstParameter) asts[3];
    // Column selection
    AstNumList cols_numlist = new AstNumList(col_list.columns(dst.names()));
    // Special for AstAssign: "empty" really means "all"
    if (cols_numlist.isEmpty())
        cols_numlist = new AstNumList(0, dst.numCols());
    // Allow R-like number list expansion: negative column numbers mean exclusion
    int[] cols = AstColSlice.col_select(dst.names(), cols_numlist);
    // Any COW optimized path changes Vecs in dst._vecs, and so needs a
    // defensive copy.  Any update-in-place path updates Chunks instead of
    // dst._vecs, and does not need a defensive copy.  To make life easier,
    // just make the copy now.
    dst = new Frame(dst._names, dst.vecs().clone());
    // Assign over the column slice
    if (asts[4] instanceof AstNum || asts[4] instanceof AstNumList) {
        // Explictly named row assignment
        AstNumList rows = (asts[4] instanceof AstNum) ? new AstNumList(((AstNum) asts[4]).getNum()) : ((AstNumList) asts[4]);
        // Empty rows is really: all rows
        if (rows.isEmpty())
            rows = new AstNumList(0, dst.numRows());
        switch(vsrc.type()) {
            case Val.NUM:
                assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
                break;
            case Val.STR:
                assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
                break;
            case Val.FRM:
                assign_frame_frame(dst, cols, rows, vsrc.getFrame(), env._ses);
                break;
            default:
                throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
        }
    } else {
        // Boolean assignment selection?
        Frame rows = stk.track(asts[4].exec(env)).getFrame();
        switch(vsrc.type()) {
            case Val.NUM:
                assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
                break;
            case Val.STR:
                assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
                break;
            case Val.FRM:
                throw H2O.unimpl();
            default:
                throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
        }
    }
    return new ValFrame(dst);
}
Also used : ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstParameter(water.rapids.ast.AstParameter) AstNumList(water.rapids.ast.params.AstNumList)

Example 4 with AstNum

use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.

the class AstHist method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // stack is [ ..., ary, breaks]
    // handle the breaks
    Frame fr2;
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    Vec vec = f.anyVec();
    if (!vec.isNumeric())
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    //TODO Add case when vec is a constant numeric
    if (vec.isConst())
        throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
    AstRoot a = asts[2];
    String algo = null;
    int numBreaks = -1;
    double[] breaks = null;
    if (a instanceof AstStr)
        algo = a.str().toLowerCase();
    else if (a instanceof AstNumList)
        breaks = ((AstNumList) a).expand();
    else if (a instanceof AstNum)
        numBreaks = (int) a.exec(env).getNum();
    AstHist.HistTask t;
    double h;
    double x1 = vec.max();
    double x0 = vec.min();
    if (breaks != null)
        t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
    else if (algo != null) {
        switch(algo) {
            case "sturges":
                numBreaks = sturges(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "rice":
                numBreaks = rice(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "sqrt":
                numBreaks = sqrt(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "doane":
                numBreaks = doane(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "scott":
                h = scotts_h(vec);
                numBreaks = scott(vec, h);
                // special bin width computation
                break;
            case "fd":
                h = fds_h(vec);
                numBreaks = fd(vec, h);
                // special bin width computation
                break;
            default:
                numBreaks = sturges(vec);
                // just do sturges even if junk passed in
                h = (x1 - x0) / numBreaks;
        }
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    } else {
        h = (x1 - x0) / numBreaks;
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    }
    // wanna make a new frame here [breaks,counts,mids]
    final double[] brks = t._breaks;
    final long[] cnts = t._counts;
    final double[] mids_true = t._mids;
    final double[] mids = new double[t._breaks.length - 1];
    for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
    Vec layoutVec = Vec.makeZero(brks.length);
    fr2 = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] nc) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                nc[0].addNum(brks[i + start]);
                if (i == 0) {
                    nc[1].addNA();
                    nc[2].addNA();
                    nc[3].addNA();
                } else {
                    nc[1].addNum(cnts[(i - 1) + start]);
                    nc[2].addNum(mids_true[(i - 1) + start]);
                    nc[3].addNum(mids[(i - 1) + start]);
                }
            }
        }
    }.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
    layoutVec.remove();
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) Vec(water.fvec.Vec) AstStr(water.rapids.ast.params.AstStr) MRTask(water.MRTask) AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList)

Example 5 with AstNum

use of water.rapids.ast.params.AstNum in project h2o-3 by h2oai.

the class AstImpute method apply.

// (h2o.impute data col method combine_method groupby groupByFrame values)
@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // Argument parsing and sanity checking
    // Whole frame being imputed
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    // Column within frame being imputed
    final int col = (int) asts[2].exec(env).getNum();
    if (col >= fr.numCols())
        throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
    final boolean doAllVecs = col == -1;
    final Vec vec = doAllVecs ? null : fr.vec(col);
    // Technique used for imputation
    AstRoot method = null;
    boolean ffill0 = false, bfill0 = false;
    switch(asts[3].exec(env).getStr().toUpperCase()) {
        case "MEAN":
            method = new AstMean();
            break;
        case "MEDIAN":
            method = new AstMedian();
            break;
        case "MODE":
            method = new AstMode();
            break;
        case "FFILL":
            ffill0 = true;
            break;
        case "BFILL":
            bfill0 = true;
            break;
        default:
            throw new IllegalArgumentException("Method must be one of mean, median or mode");
    }
    // Only for median, how is the median computed on even sample sizes?
    QuantileModel.CombineMethod combine = QuantileModel.CombineMethod.valueOf(asts[4].exec(env).getStr().toUpperCase());
    // Group-by columns.  Empty is allowed, and perfectly normal.
    AstRoot ast = asts[5];
    AstNumList by2;
    if (ast instanceof AstNumList)
        by2 = (AstNumList) ast;
    else if (ast instanceof AstNum)
        by2 = new AstNumList(((AstNum) ast).getNum());
    else if (ast instanceof AstStrList) {
        String[] names = ((AstStrList) ast)._strs;
        double[] list = new double[names.length];
        int i = 0;
        for (String name : ((AstStrList) ast)._strs) list[i++] = fr.find(name);
        Arrays.sort(list);
        by2 = new AstNumList(list);
    } else
        throw new IllegalArgumentException("Requires a number-list, but found a " + ast.getClass());
    Frame groupByFrame = asts[6].str().equals("_") ? null : stk.track(asts[6].exec(env)).getFrame();
    AstRoot vals = asts[7];
    AstNumList values;
    if (vals instanceof AstNumList)
        values = (AstNumList) vals;
    else if (vals instanceof AstNum)
        values = new AstNumList(((AstNum) vals).getNum());
    else
        values = null;
    boolean doGrpBy = !by2.isEmpty() || groupByFrame != null;
    // Compute the imputed value per-group.  Empty groups are allowed and OK.
    IcedHashMap<AstGroup.G, Freezable[]> group_impute_map;
    if (!doGrpBy) {
        // Skip the grouping work
        if (ffill0 || bfill0) {
            // do a forward/backward fill on the NA
            // TODO: requires chk.previousNonNA and chk.nextNonNA style methods (which may go across chk boundaries)s
            final boolean ffill = ffill0;
            final boolean bfill = bfill0;
            throw H2O.unimpl("No ffill or bfill imputation supported");
        //        new MRTask() {
        //          @Override public void map(Chunk[] cs) {
        //            int len=cs[0]._len; // end of this chk
        //            long start=cs[0].start();  // absolute beginning of chk s.t. start-1 bleeds into previous chk
        //            long absEnd = start+len;   // absolute end of the chk s.t. absEnd+1 bleeds into next chk
        //            for(int c=0;c<cs.length;++c )
        //              for(int r=0;r<cs[0]._len;++r ) {
        //                if( cs[c].isNA(r) ) {
        //                  if( r > 0 && r < len-1 ) {
        //                    cs[c].set(r,ffill?)
        //                  }
        //                }
        //              }
        //          }
        //        }.doAll(doAllVecs?fr:new Frame(vec));
        //        return new ValNum(Double.NaN);
        } else {
            final double[] res = values == null ? new double[fr.numCols()] : values.expand();
            if (values == null) {
                // fill up res if no values supplied user, common case
                if (doAllVecs) {
                    for (int i = 0; i < res.length; ++i) if (fr.vec(i).isNumeric() || fr.vec(i).isCategorical())
                        res[i] = fr.vec(i).isNumeric() ? fr.vec(i).mean() : ArrayUtils.maxIndex(fr.vec(i).bins());
                } else {
                    Arrays.fill(res, Double.NaN);
                    if (method instanceof AstMean)
                        res[col] = vec.mean();
                    if (method instanceof AstMedian)
                        res[col] = AstMedian.median(new Frame(vec), combine);
                    if (method instanceof AstMode)
                        res[col] = AstMode.mode(vec);
                }
            }
            new MRTask() {

                @Override
                public void map(Chunk[] cs) {
                    int len = cs[0]._len;
                    // run down each chk
                    for (int c = 0; c < cs.length; ++c) if (!Double.isNaN(res[c]))
                        for (int row = 0; row < len; ++row) if (cs[c].isNA(row))
                            cs[c].set(row, res[c]);
                }
            }.doAll(fr);
            return new ValNums(res);
        }
    } else {
        if (col >= fr.numCols())
            throw new IllegalArgumentException("Column not -1 or in range 0 to " + fr.numCols());
        Frame imputes = groupByFrame;
        if (imputes == null) {
            // Build and run a GroupBy command
            AstGroup ast_grp = new AstGroup();
            // simple case where user specified a column... col == -1 means do all columns
            if (doAllVecs) {
                AstRoot[] aggs = new AstRoot[(int) (3 + 3 * (fr.numCols() - by2.cnt()))];
                aggs[0] = ast_grp;
                aggs[1] = new AstFrame(fr);
                aggs[2] = by2;
                int c = 3;
                for (int i = 0; i < fr.numCols(); ++i) {
                    if (!by2.has(i) && (fr.vec(i).isCategorical() || fr.vec(i).isNumeric())) {
                        aggs[c] = fr.vec(i).isNumeric() ? new AstMean() : new AstMode();
                        aggs[c + 1] = new AstNumList(i, i + 1);
                        aggs[c + 2] = new AstStr("rm");
                        c += 3;
                    }
                }
                imputes = ast_grp.apply(env, stk, aggs).getFrame();
            } else
                imputes = ast_grp.apply(env, stk, new AstRoot[] { ast_grp, new AstFrame(fr), by2, /**/
                method, new AstNumList(col, col + 1), new AstStr("rm") }).getFrame();
        }
        if (// >2 makes it ambiguous which columns are groupby cols and which are aggs, throw IAE
        by2.isEmpty() && imputes.numCols() > 2)
            throw new IllegalArgumentException("Ambiguous group-by frame. Supply the `by` columns to proceed.");
        final int[] bycols0 = ArrayUtils.seq(0, Math.max((int) by2.cnt(), 1));
        group_impute_map = new Gather(by2.expand4(), bycols0, fr.numCols(), col).doAll(imputes)._group_impute_map;
        // Now walk over the data, replace NAs with the imputed results
        final IcedHashMap<AstGroup.G, Freezable[]> final_group_impute_map = group_impute_map;
        if (by2.isEmpty()) {
            int[] byCols = new int[imputes.numCols() - 1];
            for (int i = 0; i < byCols.length; ++i) byCols[i] = fr.find(imputes.name(i));
            by2 = new AstNumList(byCols);
        }
        final int[] bycols = by2.expand4();
        new MRTask() {

            @Override
            public void map(Chunk[] cs) {
                Set<Integer> _bycolz = new HashSet<>();
                for (int b : bycols) _bycolz.add(b);
                AstGroup.G g = new AstGroup.G(bycols.length, null);
                for (int row = 0; row < cs[0]._len; row++) for (int c = 0; c < cs.length; ++c) if (!_bycolz.contains(c))
                    if (cs[c].isNA(row))
                        cs[c].set(row, ((IcedDouble) final_group_impute_map.get(g.fill(row, cs, bycols))[c])._val);
            }
        }.doAll(fr);
        return new ValFrame(imputes);
    }
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) IcedDouble(water.util.IcedDouble) Frame(water.fvec.Frame) ValFrame(water.rapids.vals.ValFrame) AstFrame(water.rapids.ast.AstFrame) HashSet(java.util.HashSet) Set(java.util.Set) ValFrame(water.rapids.vals.ValFrame) AstGroup(water.rapids.ast.prims.mungers.AstGroup) MRTask(water.MRTask) AstStr(water.rapids.ast.params.AstStr) AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList) Chunk(water.fvec.Chunk) QuantileModel(hex.quantile.QuantileModel) AstMedian(water.rapids.ast.prims.reducers.AstMedian) AstNum(water.rapids.ast.params.AstNum) AstFrame(water.rapids.ast.AstFrame) Vec(water.fvec.Vec) ValNums(water.rapids.vals.ValNums) AstMean(water.rapids.ast.prims.reducers.AstMean)

Aggregations

AstNum (water.rapids.ast.params.AstNum)7 ValFrame (water.rapids.vals.ValFrame)7 Frame (water.fvec.Frame)6 AstNumList (water.rapids.ast.params.AstNumList)6 AstStr (water.rapids.ast.params.AstStr)3 AstStrList (water.rapids.ast.params.AstStrList)3 MRTask (water.MRTask)2 Chunk (water.fvec.Chunk)2 Vec (water.fvec.Vec)2 AstParameter (water.rapids.ast.AstParameter)2 AstRoot (water.rapids.ast.AstRoot)2 QuantileModel (hex.quantile.QuantileModel)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 NewChunk (water.fvec.NewChunk)1 BufferedString (water.parser.BufferedString)1 Val (water.rapids.Val)1 AstExec (water.rapids.ast.AstExec)1 AstFrame (water.rapids.ast.AstFrame)1 AstId (water.rapids.ast.params.AstId)1