Search in sources :

Example 71 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstIfElse method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Val val = stk.track(asts[1].exec(env));
    if (val.isNum()) {
        // Scalar test, scalar result
        double d = val.getNum();
        if (Double.isNaN(d))
            return new ValNum(Double.NaN);
        // exec only 1 of false and true
        Val res = stk.track(asts[d == 0 ? 3 : 2].exec(env));
        return res.isFrame() ? new ValNum(res.getFrame().vec(0).at(0)) : res;
    }
    // Frame test.  Frame result.
    if (val.type() == Val.ROW)
        return row_ifelse((ValRow) val, asts[2].exec(env), asts[3].exec(env));
    Frame tst = val.getFrame();
    // If all zero's, return false and never execute true.
    Frame fr = new Frame(tst);
    Val tval = null;
    for (Vec vec : tst.vecs()) if (vec.min() != 0 || vec.max() != 0) {
        tval = exec_check(env, stk, tst, asts[2], fr);
        break;
    }
    final boolean has_tfr = tval != null && tval.isFrame();
    final String ts = (tval != null && tval.isStr()) ? tval.getStr() : null;
    final double td = (tval != null && tval.isNum()) ? tval.getNum() : Double.NaN;
    final int[] tsIntMap = new int[tst.numCols()];
    // If all nonzero's (or NA's), then never execute false.
    Val fval = null;
    for (Vec vec : tst.vecs()) if (vec.nzCnt() + vec.naCnt() < vec.length()) {
        fval = exec_check(env, stk, tst, asts[3], fr);
        break;
    }
    final boolean has_ffr = fval != null && fval.isFrame();
    final String fs = (fval != null && fval.isStr()) ? fval.getStr() : null;
    final double fd = (fval != null && fval.isNum()) ? fval.getNum() : Double.NaN;
    final int[] fsIntMap = new int[tst.numCols()];
    String[][] domains = null;
    final int[][] maps = new int[tst.numCols()][];
    if (fs != null || ts != null) {
        // time to build domains...
        domains = new String[tst.numCols()][];
        if (fs != null && ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                // false => 0; truth => 1
                domains[i] = new String[] { fs, ts };
                fsIntMap[i] = 0;
                tsIntMap[i] = 1;
            }
        } else if (ts != null) {
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_ffr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_tfr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = ts;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    tsIntMap[i] = ArrayUtils.find(dom, ts);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        } else {
            // fs!=null
            for (int i = 0; i < tst.numCols(); ++i) {
                if (has_tfr) {
                    Vec v = fr.vec(i + tst.numCols() + (has_ffr ? tst.numCols() : 0));
                    if (!v.isCategorical())
                        throw H2O.unimpl("Column is not categorical.");
                    String[] dom = Arrays.copyOf(v.domain(), v.domain().length + 1);
                    dom[dom.length - 1] = fs;
                    Arrays.sort(dom);
                    maps[i] = computeMap(v.domain(), dom);
                    fsIntMap[i] = ArrayUtils.find(dom, fs);
                    domains[i] = dom;
                } else
                    throw H2O.unimpl();
            }
        }
    }
    // Now pick from left-or-right in the new frame
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] nchks) {
            assert nchks.length + (has_tfr ? nchks.length : 0) + (has_ffr ? nchks.length : 0) == chks.length;
            for (int i = 0; i < nchks.length; i++) {
                Chunk ctst = chks[i];
                NewChunk res = nchks[i];
                for (int row = 0; row < ctst._len; row++) {
                    double d;
                    if (ctst.isNA(row))
                        d = Double.NaN;
                    else if (ctst.atd(row) == 0)
                        d = has_ffr ? domainMap(chks[i + nchks.length + (has_tfr ? nchks.length : 0)].atd(row), maps[i]) : fs != null ? fsIntMap[i] : fd;
                    else
                        d = has_tfr ? domainMap(chks[i + nchks.length].atd(row), maps[i]) : ts != null ? tsIntMap[i] : td;
                    res.addNum(d);
                }
            }
        }
    }.doAll(tst.numCols(), Vec.T_NUM, fr).outputFrame(null, domains);
    // flatten domains since they may be larger than needed
    if (domains != null) {
        for (int i = 0; i < res.numCols(); ++i) {
            if (res.vec(i).domain() != null) {
                final long[] dom = new VecUtils.CollectDomainFast((int) res.vec(i).max()).doAll(res.vec(i)).domain();
                String[] newDomain = new String[dom.length];
                for (int l = 0; l < dom.length; ++l) newDomain[l] = res.vec(i).domain()[(int) dom[l]];
                new MRTask() {

                    @Override
                    public void map(Chunk c) {
                        for (int i = 0; i < c._len; ++i) {
                            if (!c.isNA(i))
                                c.set(i, ArrayUtils.find(dom, c.at8(i)));
                        }
                    }
                }.doAll(res.vec(i));
                // needs a DKVput?
                res.vec(i).setDomain(newDomain);
            }
        }
    }
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 72 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstMean method rowwiseMean.

/**
   * Compute Frame means by rows, and return a frame consisting of a single Vec of means in each row.
   */
private ValFrame rowwiseMean(Frame fr, final boolean na_rm) {
    String[] newnames = { "mean" };
    Key<Frame> newkey = Key.make();
    // Determine how many columns of different types we have
    int n_numeric = 0, n_time = 0;
    for (Vec vec : fr.vecs()) {
        if (vec.isNumeric())
            n_numeric++;
        if (vec.isTime())
            n_time++;
    }
    // Compute the type of the resulting column: if all columns are TIME then the result is also time; otherwise
    // if at least one column is numeric then the result is also numeric.
    byte resType = n_numeric > 0 ? Vec.T_NUM : Vec.T_TIME;
    // Construct the frame over which the mean should be computed
    Frame compFrame = new Frame();
    for (int i = 0; i < fr.numCols(); i++) {
        Vec vec = fr.vec(i);
        if (n_numeric > 0 ? vec.isNumeric() : vec.isTime())
            compFrame.add(fr.name(i), vec);
    }
    Vec anyvec = compFrame.anyVec();
    // Take into account certain corner cases
    if (anyvec == null) {
        Frame res = new Frame(newkey);
        anyvec = fr.anyVec();
        if (anyvec != null) {
            // All columns in the original frame are non-numeric -> return a vec of NAs
            res.add("mean", anyvec.makeCon(Double.NaN));
        }
        // else the original frame is empty, in which case we return an empty frame too
        return new ValFrame(res);
    }
    if (!na_rm && n_numeric < fr.numCols() && n_time < fr.numCols()) {
        // If some of the columns are non-numeric and na_rm==false, then the result is a vec of NAs
        Frame res = new Frame(newkey, newnames, new Vec[] { anyvec.makeCon(Double.NaN) });
        return new ValFrame(res);
    }
    // Compute the mean over all rows
    final int numCols = compFrame.numCols();
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] cs, NewChunk nc) {
            for (int i = 0; i < cs[0]._len; i++) {
                double d = 0;
                int numNaColumns = 0;
                for (int j = 0; j < numCols; j++) {
                    double val = cs[j].atd(i);
                    if (Double.isNaN(val))
                        numNaColumns++;
                    else
                        d += val;
                }
                if (na_rm ? numNaColumns < numCols : numNaColumns == 0)
                    nc.addNum(d / (numCols - numNaColumns));
                else
                    nc.addNum(Double.NaN);
            }
        }
    }.doAll(1, resType, compFrame).outputFrame(newkey, newnames, null);
    // Return the result
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 73 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstDiffLag1 method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env).getFrame());
    if (fr.numCols() != 1)
        throw new IllegalArgumentException("Expected a single column for diff. Got: " + fr.numCols() + " columns.");
    if (!fr.anyVec().isNumeric())
        throw new IllegalArgumentException("Expected a numeric column for diff. Got: " + fr.anyVec().get_type_str());
    final double[] lastElemPerChk = GetLastElemPerChunkTask.get(fr.anyVec());
    return new ValFrame(new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            if (c.cidx() == 0)
                nc.addNA();
            else
                nc.addNum(c.atd(0) - lastElemPerChk[c.cidx() - 1]);
            for (int row = 1; row < c._len; ++row) nc.addNum(c.atd(row) - c.atd(row - 1));
        }
    }.doAll(fr.types(), fr).outputFrame(fr.names(), fr.domains()));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 74 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstIsax method apply.

@Override
public Val apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    AstRoot n = asts[2];
    AstRoot mc = asts[3];
    boolean optm_card = asts[4].exec(env).getNum() == 1;
    //Check vecs are numeric
    for (Vec v : fr.vecs()) {
        if (!v.isNumeric()) {
            throw new IllegalArgumentException("iSax only applies to numeric columns!");
        }
    }
    int numWords = (int) n.exec(env).getNum();
    int maxCardinality = (int) mc.exec(env).getNum();
    //Check numWords and maxCardinality are >=0
    if (numWords < 0) {
        throw new IllegalArgumentException("numWords must be greater than 0!");
    }
    if (maxCardinality < 0) {
        throw new IllegalArgumentException("maxCardinality must be greater than 0!");
    }
    ArrayList<String> columns = new ArrayList<>();
    for (int i = 0; i < numWords; i++) {
        columns.add("c" + i);
    }
    Frame fr2 = new AstIsax.IsaxTask(numWords, maxCardinality).doAll(numWords, Vec.T_NUM, fr).outputFrame(null, columns.toArray(new String[numWords]), null);
    int[] maxCards = new int[numWords];
    if (optm_card) {
        _domain_hm = new double[numWords][maxCardinality];
        for (double[] r : _domain_hm) Arrays.fill(r, Double.NaN);
        // see if we can reduce the cardinality by checking all unique tokens in all series in a word
        for (int i = 0; i < fr2.numCols(); i++) {
            String[] domains = fr2.vec(i).toCategoricalVec().domain();
            for (int j = 0; j < domains.length; j++) {
                _domain_hm[i][j] = Double.valueOf(domains[j]);
            }
        }
        // get the cardinalities of each word
        for (int i = 0; i < numWords; i++) {
            int cnt = 0;
            for (double d : _domain_hm[i]) {
                if (Double.isNaN(d))
                    break;
                else
                    cnt++;
            }
            maxCards[i] = cnt;
        }
        Frame fr2_reduced = new AstIsax.IsaxReduceCard(_domain_hm, maxCardinality).doAll(numWords, Vec.T_NUM, fr2).outputFrame(null, columns.toArray(new String[numWords]), null);
        Frame fr3 = new AstIsax.IsaxStringTask(maxCards).doAll(1, Vec.T_STR, fr2_reduced).outputFrame(null, new String[] { "iSax_index" }, null);
        //Not needed anymore
        fr2.delete();
        fr3.add(fr2_reduced);
        return new ValFrame(fr3);
    }
    for (int i = 0; i < numWords; ++i) {
        maxCards[i] = maxCardinality;
    }
    Frame fr3 = new AstIsax.IsaxStringTask(maxCards).doAll(1, Vec.T_STR, fr2).outputFrame(null, new String[] { "iSax_index" }, null);
    fr3.add(fr2);
    return new ValFrame(fr3);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ArrayList(java.util.ArrayList) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) AstRoot(water.rapids.ast.AstRoot)

Example 75 with ValFrame

use of water.rapids.vals.ValFrame in project h2o-3 by h2oai.

the class AstToLower method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    // Type check
    for (Vec v : fr.vecs()) if (!(v.isCategorical() || v.isString()))
        throw new IllegalArgumentException("tolower() requires a string or categorical column. " + "Received " + fr.anyVec().get_type_str() + ". Please convert column to a string or categorical first.");
    // Transform each vec
    Vec[] nvs = new Vec[fr.numCols()];
    int i = 0;
    for (Vec v : fr.vecs()) {
        if (v.isCategorical())
            nvs[i] = toLowerCategoricalCol(v);
        else
            nvs[i] = toLowerStringCol(v);
        i++;
    }
    return new ValFrame(new Frame(nvs));
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame)

Aggregations

ValFrame (water.rapids.vals.ValFrame)132 Frame (water.fvec.Frame)98 Val (water.rapids.Val)48 Vec (water.fvec.Vec)43 Test (org.junit.Test)38 MRTask (water.MRTask)32 Chunk (water.fvec.Chunk)24 NewChunk (water.fvec.NewChunk)23 BufferedString (water.parser.BufferedString)16 AstNumList (water.rapids.ast.params.AstNumList)11 AstNum (water.rapids.ast.params.AstNum)7 ValNum (water.rapids.vals.ValNum)7 AstRoot (water.rapids.ast.AstRoot)6 ValRow (water.rapids.vals.ValRow)6 ArrayList (java.util.ArrayList)5 Key (water.Key)5 AstStrList (water.rapids.ast.params.AstStrList)5 Futures (water.Futures)4 AstParameter (water.rapids.ast.AstParameter)4 Random (java.util.Random)3