Search in sources :

Example 1 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstDdply method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    AstNumList groupby = AstGroup.check(ncols, asts[2]);
    int[] gbCols = groupby.expand4();
    AstRoot fun = asts[3].exec(env).getFun();
    // Current execution scope; needed to lookup variables
    AstFunction scope = env._scope;
    // Pass 1: Find all the groups (and count rows-per-group)
    IcedHashMap<AstGroup.G, String> gss = AstGroup.doGroups(fr, gbCols, AstGroup.aggNRows());
    final AstGroup.G[] grps = gss.keySet().toArray(new AstGroup.G[gss.size()]);
    // apply an ORDER by here...
    final int[] ordCols = new AstNumList(0, gbCols.length).expand4();
    Arrays.sort(grps, new java.util.Comparator<AstGroup.G>() {

        // Compare 2 groups.  Iterate down _gs, stop when _gs[i] > that._gs[i],
        // or _gs[i] < that._gs[i].  Order by various columns specified by
        // _orderByCols.  NaN is treated as least
        @Override
        public int compare(AstGroup.G g1, AstGroup.G g2) {
            for (int i : ordCols) {
                if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
                    return -1;
                if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
                    return 1;
                if (g1._gs[i] != g2._gs[i])
                    return g1._gs[i] < g2._gs[i] ? -1 : 1;
            }
            return 0;
        }

        // I do not believe sort() calls equals() at this time, so no need to implement
        @Override
        public boolean equals(Object o) {
            throw H2O.unimpl();
        }
    });
    // Uniquely number the groups
    for (int gnum = 0; gnum < grps.length; gnum++) grps[gnum]._dss[0][0] = gnum;
    // Pass 2: Build all the groups, building 1 Vec per-group, with exactly the
    // same Chunk layout, except each Chunk will be the filter rows numbers; a
    // list of the Chunk-relative row-numbers for that group in an original
    // data Chunk.  Each Vec will have a *different* number of rows.
    Vec[] vgrps = new BuildGroup(gbCols, gss).doAll(gss.size(), Vec.T_NUM, fr).close();
    // Pass 3: For each group, build a full frame for the group, run the
    // function on it and tear the frame down.
    // gather up the remote tasks...
    final RemoteRapids[] remoteTasks = new RemoteRapids[gss.size()];
    Futures fs = new Futures();
    for (int i = 0; i < remoteTasks.length; i++) fs.add(RPC.call(vgrps[i]._key.home_node(), remoteTasks[i] = new RemoteRapids(fr, vgrps[i]._key, fun, scope)));
    fs.blockForPending();
    // Build the output!
    final double[] res0 = remoteTasks[0]._result;
    String[] fcnames = new String[res0.length];
    for (int i = 0; i < res0.length; i++) fcnames[i] = "ddply_C" + (i + 1);
    MRTask mrfill = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] ncs) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                // One Group per row
                AstGroup.G g = grps[i + start];
                int j;
                for (// The Group Key, as a row
                j = 0; // The Group Key, as a row
                j < g._gs.length; // The Group Key, as a row
                j++) ncs[j].addNum(g._gs[j]);
                double[] res = remoteTasks[i + start]._result;
                for (int a = 0; a < res0.length; a++) ncs[j++].addNum(res[a]);
            }
        }
    };
    Frame f = AstGroup.buildOutput(gbCols, res0.length, fr, fcnames, gss.size(), mrfill);
    return new ValFrame(f);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) AstNumList(water.rapids.ast.params.AstNumList) water.util(water.util)

Example 2 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstGroup method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    AstNumList groupby = check(ncols, asts[2]);
    final int[] gbCols = groupby.expand4();
    // Count of aggregates; knock off the first 4 ASTs (GB data [group-by] [order-by]...),
    // then count by triples.
    int naggs = (asts.length - 3) / 3;
    final AGG[] aggs = new AGG[naggs];
    for (int idx = 3; idx < asts.length; idx += 3) {
        Val v = asts[idx].exec(env);
        String fn = v instanceof ValFun ? v.getFun().str() : v.getStr();
        FCN fcn = FCN.valueOf(fn);
        AstNumList col = check(ncols, asts[idx + 1]);
        if (col.cnt() != 1)
            throw new IllegalArgumentException("Group-By functions take only a single column");
        // Aggregate column
        int agg_col = (int) col.min();
        if (fcn == FCN.mode && !fr.vec(agg_col).isCategorical())
            throw new IllegalArgumentException("Mode only allowed on categorical columns");
        NAHandling na = NAHandling.valueOf(asts[idx + 2].exec(env).getStr().toUpperCase());
        aggs[(idx - 3) / 3] = new AGG(fcn, agg_col, na, (int) fr.vec(agg_col).max() + 1);
    }
    // do the group by work now
    IcedHashMap<G, String> gss = doGroups(fr, gbCols, aggs);
    final G[] grps = gss.keySet().toArray(new G[gss.size()]);
    // apply an ORDER by here...
    if (gbCols.length > 0)
        Arrays.sort(grps, new java.util.Comparator<G>() {

            // Compare 2 groups.  Iterate down _gs, stop when _gs[i] > that._gs[i],
            // or _gs[i] < that._gs[i].  Order by various columns specified by
            // gbCols.  NaN is treated as least
            @Override
            public int compare(G g1, G g2) {
                for (int i = 0; i < gbCols.length; i++) {
                    if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
                        return -1;
                    if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
                        return 1;
                    if (g1._gs[i] != g2._gs[i])
                        return g1._gs[i] < g2._gs[i] ? -1 : 1;
                }
                return 0;
            }

            // I do not believe sort() calls equals() at this time, so no need to implement
            @Override
            public boolean equals(Object o) {
                throw H2O.unimpl();
            }
        });
    // Build the output!
    String[] fcnames = new String[aggs.length];
    for (int i = 0; i < aggs.length; i++) fcnames[i] = aggs[i]._fcn.toString() + "_" + fr.name(aggs[i]._col);
    MRTask mrfill = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] ncs) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                // One Group per row
                G g = grps[i + start];
                int j;
                for (// The Group Key, as a row
                j = 0; // The Group Key, as a row
                j < g._gs.length; // The Group Key, as a row
                j++) ncs[j].addNum(g._gs[j]);
                for (int a = 0; a < aggs.length; a++) ncs[j++].addNum(aggs[a]._fcn.postPass(g._dss[a], g._ns[a]));
            }
        }
    };
    Frame f = buildOutput(gbCols, naggs, fr, fcnames, grps.length, mrfill);
    return new ValFrame(f);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValFrame(water.rapids.vals.ValFrame) ValFun(water.rapids.vals.ValFun) MRTask(water.MRTask) AstNumList(water.rapids.ast.params.AstNumList)

Example 3 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstGroupedPermute method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    final int permCol = (int) asts[2].exec(env).getNum();
    AstNumList groupby = AstGroup.check(fr.numCols(), asts[3]);
    final int[] gbCols = groupby.expand4();
    final int permuteBy = (int) asts[4].exec(env).getNum();
    final int keepCol = (int) asts[5].exec(env).getNum();
    String[] names = new String[gbCols.length + 4];
    int i = 0;
    for (; i < gbCols.length; ++i) names[i] = fr.name(gbCols[i]);
    names[i++] = "In";
    names[i++] = "Out";
    names[i++] = "InAmnt";
    names[i] = "OutAmnt";
    String[][] domains = new String[names.length][];
    int d = 0;
    for (; d < gbCols.length; d++) domains[d] = fr.domains()[gbCols[d]];
    domains[d++] = fr.domains()[permCol];
    domains[d++] = fr.domains()[permCol];
    domains[d++] = fr.domains()[keepCol];
    domains[d] = fr.domains()[keepCol];
    long s = System.currentTimeMillis();
    BuildGroups t = new BuildGroups(gbCols, permuteBy, permCol, keepCol).doAll(fr);
    Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
    s = System.currentTimeMillis();
    SmashGroups sg;
    H2O.submitTask(sg = new SmashGroups(t._grps)).join();
    Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
    return new ValFrame(buildOutput(sg._res.values().toArray(new double[0][][]), names, domains));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstNumList(water.rapids.ast.params.AstNumList)

Example 4 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstRowSlice method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame returningFrame;
    long nrows = fr.numRows();
    if (asts[2] instanceof AstNumList) {
        final AstNumList nums = (AstNumList) asts[2];
        if (!nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
            throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
        long[] rows = (nums._isList || nums.min() < 0) ? nums.expand8Sort() : null;
        if (rows != null) {
            if (rows.length == 0) {
            // Empty inclusion list?
            } else if (rows[0] >= 0) {
                // Positive (inclusion) list
                if (rows[rows.length - 1] > nrows)
                    throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
            } else {
                // Negative (exclusion) list
                if (rows[rows.length - 1] >= 0)
                    throw new IllegalArgumentException("Cannot mix negative and postive row selection");
                // Invert the list to make a positive list, ignoring out-of-bounds values
                BitSet bs = new BitSet((int) nrows);
                for (long row : rows) {
                    // The positive index
                    int idx = (int) (-row - 1);
                    if (idx >= 0 && idx < nrows)
                        // Set column to EXCLUDE
                        bs.set(idx);
                }
                rows = new long[(int) nrows - bs.cardinality()];
                for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1)) rows[j++] = i;
            }
        }
        final long[] ls = rows;
        returningFrame = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                if (nums.cnt() == 0)
                    return;
                if (ls != null && ls.length == 0)
                    return;
                long start = cs[0].start();
                long end = start + cs[0]._len;
                // exclusive max to inclusive max when stride == 1
                long min = ls == null ? (long) nums.min() : ls[0], max = ls == null ? (long) nums.max() - 1 : ls[ls.length - 1];
                //5                   [ nums ]  nums run rite:  start <= nums.min() && end < nums.max()
                if (!(max < start || min > end)) {
                    // not situation 1 or 2 above
                    // situation 4 and 5 => min > start;
                    long startOffset = (min > start ? min : start);
                    for (int i = (int) (startOffset - start); i < cs[0]._len; ++i) {
                        if ((ls == null && nums.has(start + i)) || (ls != null && Arrays.binarySearch(ls, start + i) >= 0)) {
                            for (int c = 0; c < cs.length; ++c) {
                                if (cs[c] instanceof CStrChunk)
                                    ncs[c].addStr(cs[c], i);
                                else if (cs[c] instanceof C16Chunk)
                                    ncs[c].addUUID(cs[c], i);
                                else if (cs[c].isNA(i))
                                    ncs[c].addNA();
                                else
                                    ncs[c].addNum(cs[c].atd(i));
                            }
                        }
                    }
                }
            }
        }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains());
    } else if ((asts[2] instanceof AstNum)) {
        long[] rows = new long[] { (long) (((AstNum) asts[2]).getNum()) };
        returningFrame = fr.deepSlice(rows, null);
    } else if ((asts[2] instanceof AstExec) || (asts[2] instanceof AstId)) {
        Frame predVec = stk.track(asts[2].exec(env)).getFrame();
        if (predVec.numCols() != 1)
            throw new IllegalArgumentException("Conditional Row Slicing Expression evaluated to " + predVec.numCols() + " columns.  Must be a boolean Vec.");
        returningFrame = fr.deepSlice(predVec, null);
    } else
        throw new IllegalArgumentException("Row slicing requires a number-list as the last argument, but found a " + asts[2].getClass());
    return new ValFrame(returningFrame);
}
Also used : ValFrame(water.rapids.vals.ValFrame) AstExec(water.rapids.ast.AstExec) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) AstId(water.rapids.ast.params.AstId) AstNumList(water.rapids.ast.params.AstNumList)

Example 5 with AstNumList

use of water.rapids.ast.params.AstNumList in project h2o-3 by h2oai.

the class AstScale method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    // Peel out the bias/shift/mean
    double[] means;
    if (asts[2] instanceof AstNumList) {
        means = ((AstNumList) asts[2]).expand();
        if (means.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        double d = asts[2].exec(env).getNum();
        if (// No change on means, so zero-filled
        d == 0)
            // No change on means, so zero-filled
            means = new double[ncols];
        else if (d == 1)
            means = fr.means();
        else
            throw new IllegalArgumentException("Only true or false allowed");
    }
    // Peel out the scale/stddev
    double[] mults;
    if (asts[3] instanceof AstNumList) {
        mults = ((AstNumList) asts[3]).expand();
        if (mults.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        Val v = asts[3].exec(env);
        if (v instanceof ValFrame) {
            mults = toArray(v.getFrame().anyVec());
        } else {
            double d = v.getNum();
            if (d == 0)
                // No change on mults, so one-filled
                Arrays.fill(mults = new double[ncols], 1.0);
            else if (d == 1)
                mults = fr.mults();
            else
                throw new IllegalArgumentException("Only true or false allowed");
        }
    }
    // Update in-place.
    // Make final copy for closure
    final double[] fmeans = means;
    // Make final copy for closure
    final double[] fmults = mults;
    new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
        }
    }.doAll(fr);
    return new ValFrame(fr);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) AstNumList(water.rapids.ast.params.AstNumList)

Aggregations

AstNumList (water.rapids.ast.params.AstNumList)12 ValFrame (water.rapids.vals.ValFrame)11 Frame (water.fvec.Frame)8 AstNum (water.rapids.ast.params.AstNum)6 MRTask (water.MRTask)4 Chunk (water.fvec.Chunk)3 AstRoot (water.rapids.ast.AstRoot)3 AstStr (water.rapids.ast.params.AstStr)3 AstStrList (water.rapids.ast.params.AstStrList)3 Vec (water.fvec.Vec)2 Val (water.rapids.Val)2 QuantileModel (hex.quantile.QuantileModel)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 NewChunk (water.fvec.NewChunk)1 BufferedString (water.parser.BufferedString)1 AstExec (water.rapids.ast.AstExec)1 AstFrame (water.rapids.ast.AstFrame)1 AstParameter (water.rapids.ast.AstParameter)1 AstId (water.rapids.ast.params.AstId)1