Search in sources :

Example 91 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstScale method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    // Peel out the bias/shift/mean
    double[] means;
    if (asts[2] instanceof AstNumList) {
        means = ((AstNumList) asts[2]).expand();
        if (means.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        double d = asts[2].exec(env).getNum();
        if (// No change on means, so zero-filled
        d == 0)
            // No change on means, so zero-filled
            means = new double[ncols];
        else if (d == 1)
            means = fr.means();
        else
            throw new IllegalArgumentException("Only true or false allowed");
    }
    // Peel out the scale/stddev
    double[] mults;
    if (asts[3] instanceof AstNumList) {
        mults = ((AstNumList) asts[3]).expand();
        if (mults.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        Val v = asts[3].exec(env);
        if (v instanceof ValFrame) {
            mults = toArray(v.getFrame().anyVec());
        } else {
            double d = v.getNum();
            if (d == 0)
                // No change on mults, so one-filled
                Arrays.fill(mults = new double[ncols], 1.0);
            else if (d == 1)
                mults = fr.mults();
            else
                throw new IllegalArgumentException("Only true or false allowed");
        }
    }
    // Update in-place.
    // Make final copy for closure
    final double[] fmeans = means;
    // Make final copy for closure
    final double[] fmults = mults;
    new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
        }
    }.doAll(fr);
    return new ValFrame(fr);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) AstNumList(water.rapids.ast.params.AstNumList)

Example 92 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstSetDomain method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    String[] _domains = ((AstStrList) asts[2])._strs;
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Must be a single column. Got: " + f.numCols() + " columns.");
    Vec v = f.anyVec();
    if (!v.isCategorical())
        throw new IllegalArgumentException("Vector must be a factor column. Got: " + v.get_type_str());
    if (_domains != null && _domains.length != v.domain().length) {
        // in this case we want to recollect the domain and check that number of levels matches _domains
        VecUtils.CollectDomainFast t = new VecUtils.CollectDomainFast((int) v.max());
        t.doAll(v);
        final long[] dom = t.domain();
        if (dom.length != _domains.length)
            throw new IllegalArgumentException("Number of replacement factors must equal current number of levels. Current number of levels: " + dom.length + " != " + _domains.length);
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) {
                    if (!c.isNA(i)) {
                        long num = Arrays.binarySearch(dom, c.at8(i));
                        if (num < 0)
                            throw new IllegalArgumentException("Could not find the categorical value!");
                        c.set(i, num);
                    }
                }
            }
        }.doAll(v);
    }
    v.setDomain(_domains);
    DKV.put(v);
    return new ValFrame(f);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) ValFrame(water.rapids.vals.ValFrame) VecUtils(water.util.VecUtils) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 93 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstBinOp method frame_op_frame.

/**
   * Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
   * the rest.  Otherwise the frames must have the same column count, and
   * auto-widen element-by-element.  Short-cut if one frame has zero
   * columns.
   */
private ValFrame frame_op_frame(Frame lf, Frame rt) {
    if (lf.numRows() != rt.numRows()) {
        // special case for broadcasting a single row of data across a frame
        if (lf.numRows() == 1 || rt.numRows() == 1) {
            if (lf.numCols() != rt.numCols())
                throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
            return frame_op_row(lf, rt);
        } else
            throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
    }
    if (lf.numCols() == 0)
        return new ValFrame(lf);
    if (rt.numCols() == 0)
        return new ValFrame(rt);
    if (lf.numCols() == 1 && rt.numCols() > 1)
        return vec_op_frame(lf.vecs()[0], rt);
    if (rt.numCols() == 1 && lf.numCols() > 1)
        return frame_op_vec(lf, rt.vecs()[0]);
    if (lf.numCols() != rt.numCols())
        throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString lfstr = new BufferedString();
            BufferedString rtstr = new BufferedString();
            assert (cress.length << 1) == chks.length;
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                Chunk crt = chks[c + cress.length];
                NewChunk cres = cress[c];
                if (clf.vec().isString())
                    for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
                else
                    for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 94 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstBinOp method frame_op_vec.

private ValFrame frame_op_vec(Frame fr, Vec vec) {
    // Already checked for same rows, non-zero frame
    Frame lf = new Frame(fr);
    lf.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk crt = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 95 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AstBinOp method scalar_op_frame.

/**
   * Auto-widen the scalar to every element of the frame
   */
private ValFrame scalar_op_frame(final String str, Frame fr) {
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString vstr = new BufferedString();
            for (int c = 0; c < chks.length; c++) {
                Chunk chk = chks[c];
                NewChunk cres = cress[c];
                Vec vec = chk.vec();
                // String Vectors: apply str_op as BufferedStrings to all elements
                if (vec.isString()) {
                    final BufferedString conStr = new BufferedString(str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(str_op(conStr, chk.atStr(vstr, i)));
                } else if (vec.isCategorical()) {
                    // categorical Vectors: convert string to domain value; apply op (not
                    // str_op).  Not sure what the "right" behavior here is, can
                    // easily argue that should instead apply str_op to the categorical
                    // string domain value - except that this whole operation only
                    // makes sense for EQ/NE, and is much faster when just comparing
                    // doubles vs comparing strings.
                    final double d = (double) ArrayUtils.find(vec.domain(), str);
                    for (int i = 0; i < chk._len; i++) cres.addNum(op(d, chk.atd(i)));
                } else {
                    // mixing string and numeric
                    // false or true only
                    final double d = op(1, 2);
                    for (int i = 0; i < chk._len; i++) cres.addNum(d);
                }
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(fr._names, null);
    return new ValFrame(res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16