Search in sources :

Example 1 with Group

use of water.exec.ASTddply.Group in project h2o-2 by h2oai.

the class Impute method serve.

@Override
protected Response serve() {
    if (init())
        return Inspect2.redirect(this, source._key.toString());
    final int col_id = source.find(column);
    final int[] _cols = group_by;
    final Key mykey = Key.make();
    try {
        if (group_by == null) {
            // just use "method" using the input "column"
            double _replace_val = 0;
            if (method == Method.mean) {
                _replace_val = column.mean();
            } else if (method == Method.median) {
                QuantilesPage qp = new QuantilesPage();
                qp.source_key = source;
                qp.column = column;
                qp.invoke();
                _replace_val = qp.result;
            } else if (method == Method.mode) {
                String[] dom = column.domain();
                long[][] levels = new long[1][];
                levels[0] = new Vec.CollectDomain(column).doAll(new Frame(column)).domain();
                long[][] counts = new ASTTable.Tabularize(levels).doAll(column)._counts;
                long maxCounts = -1;
                int mode = -1;
                for (int i = 0; i < counts[0].length; ++i) {
                    if (counts[0][i] > maxCounts && !dom[i].equals("NA")) {
                        // check for "NA" in domain -- corner case from R
                        maxCounts = counts[0][i];
                        mode = i;
                    }
                }
                _replace_val = mode != -1 ? (double) mode : // could produce -1 if "NA" not in the domain -- that is we don't have the R corner case
                (double) Arrays.asList(dom).indexOf("NA");
                // OK to replace, since we're in the elif "mode" block
                if (_replace_val == -1)
                    _replace_val = Double.NaN;
            }
            final double rv = _replace_val;
            new MRTask2() {

                @Override
                public void map(Chunk[] cs) {
                    Chunk c = cs[col_id];
                    int rows = c.len();
                    for (int r = 0; r < rows; ++r) {
                        if (c.isNA0(r) || (c._vec.isEnum() && c._vec.domain()[(int) c.at0(r)].equals("NA"))) {
                            // leave as NA if replace value is NA
                            if (!Double.isNaN(rv))
                                c.set0(r, rv);
                        }
                    }
                }
            }.doAll(source);
        } else {
            // collect the groups HashMap and the frame from the ddply.
            // create a vec of group IDs (each row is in some group)
            // MRTask over the rows
            water.exec.Exec2.exec(Key.make().toString() + " = anonymous <- function(x) \n{\n " + method + "(x[," + (col_id + 1) + "])\n}").remove_and_unlock();
            Env env = water.exec.Exec2.exec(mykey.toString() + " = ddply(" + source._key.toString() + ", " + toAryString(_cols) + ", anonymous)");
            final Frame grp_replacement = new Frame(env.peekAry());
            env.remove_and_unlock();
            Log.info("GROUP TASK NUM COLS: " + grp_replacement.numCols());
            final GroupTask grp2val = new GroupTask(grp_replacement.numCols() - 1).doAll(grp_replacement);
            new MRTask2() {

                @Override
                public void map(Chunk[] cs) {
                    Chunk c = cs[col_id];
                    int rows = cs[0].len();
                    for (int r = 0; r < rows; ++r) {
                        if (c.isNA0(r) || (c._vec.isEnum() && c._vec.domain()[(int) c.at0(r)].equals("NA"))) {
                            Group g = new Group(_cols.length);
                            g.fill(r, cs, _cols);
                            if (grp2val._grp2val.get(g) == null)
                                continue;
                            double rv = grp2val._grp2val.get(g);
                            c.set0(r, rv);
                        }
                    }
                }
            }.doAll(source);
        }
        return Inspect2.redirect(this, source._key.toString());
    } catch (Throwable t) {
        return Response.error(t);
    } finally {
        // Delete frames
        UKV.remove(mykey);
    }
}
Also used : Group(water.exec.ASTddply.Group) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) Env(water.exec.Env) Vec(water.fvec.Vec)

Aggregations

Group (water.exec.ASTddply.Group)1 Env (water.exec.Env)1 Chunk (water.fvec.Chunk)1 Frame (water.fvec.Frame)1 Vec (water.fvec.Vec)1