Search in sources :

Example 1 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstCut method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    double[] cuts = check(asts[2]);
    Arrays.sort(cuts);
    String[] labels = check2(asts[3]);
    final boolean lowest = asts[4].exec(env).getNum() == 1;
    final boolean rite = asts[5].exec(env).getNum() == 1;
    // cap at 12
    final int digits = Math.min((int) asts[6].exec(env).getNum(), 12);
    if (fr.vecs().length != 1 || fr.vecs()[0].isCategorical())
        throw new IllegalArgumentException("First argument must be a numeric column vector");
    double fmin = fr.anyVec().min();
    double fmax = fr.anyVec().max();
    // c(0,10,100) -> 2 bins (0,10] U (10, 100]
    int nbins = cuts.length - 1;
    double width;
    if (nbins == 0) {
        if (cuts[0] < 2)
            throw new IllegalArgumentException("The number of cuts must be >= 2. Got: " + cuts[0]);
        // in this case, cut the vec into _cuts[0] many pieces of equal length
        nbins = (int) Math.floor(cuts[0]);
        width = (fmax - fmin) / nbins;
        cuts = new double[nbins];
        cuts[0] = fmin - 0.001 * (fmax - fmin);
        for (int i = 1; i < cuts.length; ++i) cuts[i] = (i == cuts.length - 1) ? (fmax + 0.001 * (fmax - fmin)) : (fmin + i * width);
    }
    // if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
    if (labels != null && labels.length != nbins)
        throw new IllegalArgumentException("`labels` vector does not match the number of cuts.");
    // Construct domain names from _labels or bin intervals if _labels is null
    final double[] cutz = cuts;
    // first round _cuts to dig.lab decimals: example floor(2.676*100 + 0.5) / 100
    for (int i = 0; i < cuts.length; ++i) cuts[i] = Math.floor(cuts[i] * Math.pow(10, digits) + 0.5) / Math.pow(10, digits);
    String[][] domains = new String[1][nbins];
    if (labels == null) {
        domains[0][0] = (lowest ? "[" : left(rite)) + cuts[0] + "," + cuts[1] + rite(rite);
        for (int i = 1; i < (cuts.length - 1); ++i) domains[0][i] = left(rite) + cuts[i] + "," + cuts[i + 1] + rite(rite);
    } else
        domains[0] = labels;
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            int rows = c._len;
            for (int r = 0; r < rows; ++r) {
                double x = c.atd(r);
                if (Double.isNaN(x) || (lowest && x < cutz[0]) || (!lowest && (x < cutz[0] || MathUtils.equalsWithinOneSmallUlp(x, cutz[0]))) || (rite && x > cutz[cutz.length - 1]) || (!rite && (x > cutz[cutz.length - 1] || MathUtils.equalsWithinOneSmallUlp(x, cutz[cutz.length - 1]))))
                    nc.addNum(Double.NaN);
                else {
                    for (int i = 1; i < cutz.length; ++i) {
                        if (rite) {
                            if (x <= cutz[i]) {
                                nc.addNum(i - 1);
                                break;
                            }
                        } else if (x < cutz[i]) {
                            nc.addNum(i - 1);
                            break;
                        }
                    }
                }
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr.names(), domains);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 2 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstDdply method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    AstNumList groupby = AstGroup.check(ncols, asts[2]);
    int[] gbCols = groupby.expand4();
    AstRoot fun = asts[3].exec(env).getFun();
    // Current execution scope; needed to lookup variables
    AstFunction scope = env._scope;
    // Pass 1: Find all the groups (and count rows-per-group)
    IcedHashMap<AstGroup.G, String> gss = AstGroup.doGroups(fr, gbCols, AstGroup.aggNRows());
    final AstGroup.G[] grps = gss.keySet().toArray(new AstGroup.G[gss.size()]);
    // apply an ORDER by here...
    final int[] ordCols = new AstNumList(0, gbCols.length).expand4();
    Arrays.sort(grps, new java.util.Comparator<AstGroup.G>() {

        // Compare 2 groups.  Iterate down _gs, stop when _gs[i] > that._gs[i],
        // or _gs[i] < that._gs[i].  Order by various columns specified by
        // _orderByCols.  NaN is treated as least
        @Override
        public int compare(AstGroup.G g1, AstGroup.G g2) {
            for (int i : ordCols) {
                if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
                    return -1;
                if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
                    return 1;
                if (g1._gs[i] != g2._gs[i])
                    return g1._gs[i] < g2._gs[i] ? -1 : 1;
            }
            return 0;
        }

        // I do not believe sort() calls equals() at this time, so no need to implement
        @Override
        public boolean equals(Object o) {
            throw H2O.unimpl();
        }
    });
    // Uniquely number the groups
    for (int gnum = 0; gnum < grps.length; gnum++) grps[gnum]._dss[0][0] = gnum;
    // Pass 2: Build all the groups, building 1 Vec per-group, with exactly the
    // same Chunk layout, except each Chunk will be the filter rows numbers; a
    // list of the Chunk-relative row-numbers for that group in an original
    // data Chunk.  Each Vec will have a *different* number of rows.
    Vec[] vgrps = new BuildGroup(gbCols, gss).doAll(gss.size(), Vec.T_NUM, fr).close();
    // Pass 3: For each group, build a full frame for the group, run the
    // function on it and tear the frame down.
    // gather up the remote tasks...
    final RemoteRapids[] remoteTasks = new RemoteRapids[gss.size()];
    Futures fs = new Futures();
    for (int i = 0; i < remoteTasks.length; i++) fs.add(RPC.call(vgrps[i]._key.home_node(), remoteTasks[i] = new RemoteRapids(fr, vgrps[i]._key, fun, scope)));
    fs.blockForPending();
    // Build the output!
    final double[] res0 = remoteTasks[0]._result;
    String[] fcnames = new String[res0.length];
    for (int i = 0; i < res0.length; i++) fcnames[i] = "ddply_C" + (i + 1);
    MRTask mrfill = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] ncs) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                // One Group per row
                AstGroup.G g = grps[i + start];
                int j;
                for (// The Group Key, as a row
                j = 0; // The Group Key, as a row
                j < g._gs.length; // The Group Key, as a row
                j++) ncs[j].addNum(g._gs[j]);
                double[] res = remoteTasks[i + start]._result;
                for (int a = 0; a < res0.length; a++) ncs[j++].addNum(res[a]);
            }
        }
    };
    Frame f = AstGroup.buildOutput(gbCols, res0.length, fr, fcnames, gss.size(), mrfill);
    return new ValFrame(f);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) AstNumList(water.rapids.ast.params.AstNumList) water.util(water.util)

Example 3 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstFlatten method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    // did not flatten
    if (fr.numCols() != 1 || fr.numRows() != 1)
        return new ValFrame(fr);
    Vec vec = fr.anyVec();
    switch(vec.get_type()) {
        case Vec.T_BAD:
        case Vec.T_NUM:
            return new ValNum(vec.at(0));
        case Vec.T_TIME:
            // check for missing values
            return vec.isNA(0) ? new ValNum(Double.NaN) : new ValNum(vec.at8(0));
        case Vec.T_STR:
            return new ValStr(vec.atStr(new BufferedString(), 0).toString());
        case // check for missing values
        Vec.T_CAT:
            return vec.isNA(0) ? new ValStr("NA") : new ValStr(vec.factor(vec.at8(0)));
        default:
            throw H2O.unimpl("The type of vector: " + vec.get_type_str() + " is not supported by " + str());
    }
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) ValStr(water.rapids.vals.ValStr) BufferedString(water.parser.BufferedString) ValNum(water.rapids.vals.ValNum)

Example 4 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstGroup method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    AstNumList groupby = check(ncols, asts[2]);
    final int[] gbCols = groupby.expand4();
    // Count of aggregates; knock off the first 4 ASTs (GB data [group-by] [order-by]...),
    // then count by triples.
    int naggs = (asts.length - 3) / 3;
    final AGG[] aggs = new AGG[naggs];
    for (int idx = 3; idx < asts.length; idx += 3) {
        Val v = asts[idx].exec(env);
        String fn = v instanceof ValFun ? v.getFun().str() : v.getStr();
        FCN fcn = FCN.valueOf(fn);
        AstNumList col = check(ncols, asts[idx + 1]);
        if (col.cnt() != 1)
            throw new IllegalArgumentException("Group-By functions take only a single column");
        // Aggregate column
        int agg_col = (int) col.min();
        if (fcn == FCN.mode && !fr.vec(agg_col).isCategorical())
            throw new IllegalArgumentException("Mode only allowed on categorical columns");
        NAHandling na = NAHandling.valueOf(asts[idx + 2].exec(env).getStr().toUpperCase());
        aggs[(idx - 3) / 3] = new AGG(fcn, agg_col, na, (int) fr.vec(agg_col).max() + 1);
    }
    // do the group by work now
    IcedHashMap<G, String> gss = doGroups(fr, gbCols, aggs);
    final G[] grps = gss.keySet().toArray(new G[gss.size()]);
    // apply an ORDER by here...
    if (gbCols.length > 0)
        Arrays.sort(grps, new java.util.Comparator<G>() {

            // Compare 2 groups.  Iterate down _gs, stop when _gs[i] > that._gs[i],
            // or _gs[i] < that._gs[i].  Order by various columns specified by
            // gbCols.  NaN is treated as least
            @Override
            public int compare(G g1, G g2) {
                for (int i = 0; i < gbCols.length; i++) {
                    if (Double.isNaN(g1._gs[i]) && !Double.isNaN(g2._gs[i]))
                        return -1;
                    if (!Double.isNaN(g1._gs[i]) && Double.isNaN(g2._gs[i]))
                        return 1;
                    if (g1._gs[i] != g2._gs[i])
                        return g1._gs[i] < g2._gs[i] ? -1 : 1;
                }
                return 0;
            }

            // I do not believe sort() calls equals() at this time, so no need to implement
            @Override
            public boolean equals(Object o) {
                throw H2O.unimpl();
            }
        });
    // Build the output!
    String[] fcnames = new String[aggs.length];
    for (int i = 0; i < aggs.length; i++) fcnames[i] = aggs[i]._fcn.toString() + "_" + fr.name(aggs[i]._col);
    MRTask mrfill = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] ncs) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                // One Group per row
                G g = grps[i + start];
                int j;
                for (// The Group Key, as a row
                j = 0; // The Group Key, as a row
                j < g._gs.length; // The Group Key, as a row
                j++) ncs[j].addNum(g._gs[j]);
                for (int a = 0; a < aggs.length; a++) ncs[j++].addNum(aggs[a]._fcn.postPass(g._dss[a], g._ns[a]));
            }
        }
    };
    Frame f = buildOutput(gbCols, naggs, fr, fcnames, grps.length, mrfill);
    return new ValFrame(f);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValFrame(water.rapids.vals.ValFrame) ValFun(water.rapids.vals.ValFun) MRTask(water.MRTask) AstNumList(water.rapids.ast.params.AstNumList)

Example 5 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstGroupedPermute method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    final int permCol = (int) asts[2].exec(env).getNum();
    AstNumList groupby = AstGroup.check(fr.numCols(), asts[3]);
    final int[] gbCols = groupby.expand4();
    final int permuteBy = (int) asts[4].exec(env).getNum();
    final int keepCol = (int) asts[5].exec(env).getNum();
    String[] names = new String[gbCols.length + 4];
    int i = 0;
    for (; i < gbCols.length; ++i) names[i] = fr.name(gbCols[i]);
    names[i++] = "In";
    names[i++] = "Out";
    names[i++] = "InAmnt";
    names[i] = "OutAmnt";
    String[][] domains = new String[names.length][];
    int d = 0;
    for (; d < gbCols.length; d++) domains[d] = fr.domains()[gbCols[d]];
    domains[d++] = fr.domains()[permCol];
    domains[d++] = fr.domains()[permCol];
    domains[d++] = fr.domains()[keepCol];
    domains[d] = fr.domains()[keepCol];
    long s = System.currentTimeMillis();
    BuildGroups t = new BuildGroups(gbCols, permuteBy, permCol, keepCol).doAll(fr);
    Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
    s = System.currentTimeMillis();
    SmashGroups sg;
    H2O.submitTask(sg = new SmashGroups(t._grps)).join();
    Log.info("Elapsed time: " + (System.currentTimeMillis() - s) / 1000. + "s");
    return new ValFrame(buildOutput(sg._res.values().toArray(new double[0][][]), names, domains));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) AstNumList(water.rapids.ast.params.AstNumList)

Aggregations

ValFrame (water.rapids.vals.ValFrame)132 Frame (water.fvec.Frame)98 Val (water.rapids.Val)48 Vec (water.fvec.Vec)43 Test (org.junit.Test)38 MRTask (water.MRTask)32 Chunk (water.fvec.Chunk)24 NewChunk (water.fvec.NewChunk)23 BufferedString (water.parser.BufferedString)16 AstNumList (water.rapids.ast.params.AstNumList)11 AstNum (water.rapids.ast.params.AstNum)7 ValNum (water.rapids.vals.ValNum)7 AstRoot (water.rapids.ast.AstRoot)6 ValRow (water.rapids.vals.ValRow)6 ArrayList (java.util.ArrayList)5 Key (water.Key)5 AstStrList (water.rapids.ast.params.AstStrList)5 Futures (water.Futures)4 AstParameter (water.rapids.ast.AstParameter)4 Random (java.util.Random)3