Search in sources :

Example 11 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstCut method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    double[] cuts = check(asts[2]);
    Arrays.sort(cuts);
    String[] labels = check2(asts[3]);
    final boolean lowest = asts[4].exec(env).getNum() == 1;
    final boolean rite = asts[5].exec(env).getNum() == 1;
    // cap at 12
    final int digits = Math.min((int) asts[6].exec(env).getNum(), 12);
    if (fr.vecs().length != 1 || fr.vecs()[0].isCategorical())
        throw new IllegalArgumentException("First argument must be a numeric column vector");
    double fmin = fr.anyVec().min();
    double fmax = fr.anyVec().max();
    // c(0,10,100) -> 2 bins (0,10] U (10, 100]
    int nbins = cuts.length - 1;
    double width;
    if (nbins == 0) {
        if (cuts[0] < 2)
            throw new IllegalArgumentException("The number of cuts must be >= 2. Got: " + cuts[0]);
        // in this case, cut the vec into _cuts[0] many pieces of equal length
        nbins = (int) Math.floor(cuts[0]);
        width = (fmax - fmin) / nbins;
        cuts = new double[nbins];
        cuts[0] = fmin - 0.001 * (fmax - fmin);
        for (int i = 1; i < cuts.length; ++i) cuts[i] = (i == cuts.length - 1) ? (fmax + 0.001 * (fmax - fmin)) : (fmin + i * width);
    }
    // if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
    if (labels != null && labels.length != nbins)
        throw new IllegalArgumentException("`labels` vector does not match the number of cuts.");
    // Construct domain names from _labels or bin intervals if _labels is null
    final double[] cutz = cuts;
    // first round _cuts to dig.lab decimals: example floor(2.676*100 + 0.5) / 100
    for (int i = 0; i < cuts.length; ++i) cuts[i] = Math.floor(cuts[i] * Math.pow(10, digits) + 0.5) / Math.pow(10, digits);
    String[][] domains = new String[1][nbins];
    if (labels == null) {
        domains[0][0] = (lowest ? "[" : left(rite)) + cuts[0] + "," + cuts[1] + rite(rite);
        for (int i = 1; i < (cuts.length - 1); ++i) domains[0][i] = left(rite) + cuts[i] + "," + cuts[i + 1] + rite(rite);
    } else
        domains[0] = labels;
    Frame fr2 = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            int rows = c._len;
            for (int r = 0; r < rows; ++r) {
                double x = c.atd(r);
                if (Double.isNaN(x) || (lowest && x < cutz[0]) || (!lowest && (x < cutz[0] || MathUtils.equalsWithinOneSmallUlp(x, cutz[0]))) || (rite && x > cutz[cutz.length - 1]) || (!rite && (x > cutz[cutz.length - 1] || MathUtils.equalsWithinOneSmallUlp(x, cutz[cutz.length - 1]))))
                    nc.addNum(Double.NaN);
                else {
                    for (int i = 1; i < cutz.length; ++i) {
                        if (rite) {
                            if (x <= cutz[i]) {
                                nc.addNum(i - 1);
                                break;
                            }
                        } else if (x < cutz[i]) {
                            nc.addNum(i - 1);
                            break;
                        }
                    }
                }
            }
        }
    }.doAll(1, Vec.T_NUM, fr).outputFrame(fr.names(), domains);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask)

Example 12 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstScale method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    int ncols = fr.numCols();
    // Peel out the bias/shift/mean
    double[] means;
    if (asts[2] instanceof AstNumList) {
        means = ((AstNumList) asts[2]).expand();
        if (means.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        double d = asts[2].exec(env).getNum();
        if (// No change on means, so zero-filled
        d == 0)
            // No change on means, so zero-filled
            means = new double[ncols];
        else if (d == 1)
            means = fr.means();
        else
            throw new IllegalArgumentException("Only true or false allowed");
    }
    // Peel out the scale/stddev
    double[] mults;
    if (asts[3] instanceof AstNumList) {
        mults = ((AstNumList) asts[3]).expand();
        if (mults.length != ncols)
            throw new IllegalArgumentException("Numlist must be the same length as the columns of the Frame");
    } else {
        Val v = asts[3].exec(env);
        if (v instanceof ValFrame) {
            mults = toArray(v.getFrame().anyVec());
        } else {
            double d = v.getNum();
            if (d == 0)
                // No change on mults, so one-filled
                Arrays.fill(mults = new double[ncols], 1.0);
            else if (d == 1)
                mults = fr.mults();
            else
                throw new IllegalArgumentException("Only true or false allowed");
        }
    }
    // Update in-place.
    // Make final copy for closure
    final double[] fmeans = means;
    // Make final copy for closure
    final double[] fmults = mults;
    new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (int i = 0; i < cs.length; i++) for (int row = 0; row < cs[i]._len; row++) cs[i].set(row, (cs[i].atd(row) - fmeans[i]) * fmults[i]);
        }
    }.doAll(fr);
    return new ValFrame(fr);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) AstNumList(water.rapids.ast.params.AstNumList)

Example 13 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstSetDomain method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    String[] _domains = ((AstStrList) asts[2])._strs;
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Must be a single column. Got: " + f.numCols() + " columns.");
    Vec v = f.anyVec();
    if (!v.isCategorical())
        throw new IllegalArgumentException("Vector must be a factor column. Got: " + v.get_type_str());
    if (_domains != null && _domains.length != v.domain().length) {
        // in this case we want to recollect the domain and check that number of levels matches _domains
        VecUtils.CollectDomainFast t = new VecUtils.CollectDomainFast((int) v.max());
        t.doAll(v);
        final long[] dom = t.domain();
        if (dom.length != _domains.length)
            throw new IllegalArgumentException("Number of replacement factors must equal current number of levels. Current number of levels: " + dom.length + " != " + _domains.length);
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) {
                    if (!c.isNA(i)) {
                        long num = Arrays.binarySearch(dom, c.at8(i));
                        if (num < 0)
                            throw new IllegalArgumentException("Could not find the categorical value!");
                        c.set(i, num);
                    }
                }
            }
        }.doAll(v);
    }
    v.setDomain(_domains);
    DKV.put(v);
    return new ValFrame(f);
}
Also used : AstStrList(water.rapids.ast.params.AstStrList) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) ValFrame(water.rapids.vals.ValFrame) VecUtils(water.util.VecUtils) Vec(water.fvec.Vec) MRTask(water.MRTask)

Example 14 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstBinOp method frame_op_frame.

/**
   * Auto-widen: If one frame has only 1 column, auto-widen that 1 column to
   * the rest.  Otherwise the frames must have the same column count, and
   * auto-widen element-by-element.  Short-cut if one frame has zero
   * columns.
   */
private ValFrame frame_op_frame(Frame lf, Frame rt) {
    if (lf.numRows() != rt.numRows()) {
        // special case for broadcasting a single row of data across a frame
        if (lf.numRows() == 1 || rt.numRows() == 1) {
            if (lf.numCols() != rt.numCols())
                throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
            return frame_op_row(lf, rt);
        } else
            throw new IllegalArgumentException("Frames must have same rows, found " + lf.numRows() + " rows and " + rt.numRows() + " rows.");
    }
    if (lf.numCols() == 0)
        return new ValFrame(lf);
    if (rt.numCols() == 0)
        return new ValFrame(rt);
    if (lf.numCols() == 1 && rt.numCols() > 1)
        return vec_op_frame(lf.vecs()[0], rt);
    if (rt.numCols() == 1 && lf.numCols() > 1)
        return frame_op_vec(lf, rt.vecs()[0]);
    if (lf.numCols() != rt.numCols())
        throw new IllegalArgumentException("Frames must have same columns, found " + lf.numCols() + " columns and " + rt.numCols() + " columns.");
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            BufferedString lfstr = new BufferedString();
            BufferedString rtstr = new BufferedString();
            assert (cress.length << 1) == chks.length;
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                Chunk crt = chks[c + cress.length];
                NewChunk cres = cress[c];
                if (clf.vec().isString())
                    for (int i = 0; i < clf._len; i++) cres.addNum(str_op(clf.atStr(lfstr, i), crt.atStr(rtstr, i)));
                else
                    for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(lf.numCols(), Vec.T_NUM, new Frame(lf).add(rt)).outputFrame(lf._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(lf, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Example 15 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class AstBinOp method frame_op_vec.

private ValFrame frame_op_vec(Frame fr, Vec vec) {
    // Already checked for same rows, non-zero frame
    Frame lf = new Frame(fr);
    lf.add("", vec);
    Frame res = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] cress) {
            assert cress.length == chks.length - 1;
            Chunk crt = chks[cress.length];
            for (int c = 0; c < cress.length; c++) {
                Chunk clf = chks[c];
                NewChunk cres = cress[c];
                for (int i = 0; i < clf._len; i++) cres.addNum(op(clf.atd(i), crt.atd(i)));
            }
        }
    }.doAll(fr.numCols(), Vec.T_NUM, lf).outputFrame(fr._names, null);
    // Cleanup categorical misuse
    return cleanCategorical(fr, res);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

Chunk (water.fvec.Chunk)74 Frame (water.fvec.Frame)50 NewChunk (water.fvec.NewChunk)36 MRTask (water.MRTask)33 Vec (water.fvec.Vec)30 ValFrame (water.rapids.vals.ValFrame)26 C0DChunk (water.fvec.C0DChunk)7 BufferedString (water.parser.BufferedString)7 Random (java.util.Random)6 Test (org.junit.Test)5 MRTask2 (water.MRTask2)4 Val (water.rapids.Val)4 Key (water.Key)3 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)3 AstRoot (water.rapids.ast.AstRoot)3 AstNumList (water.rapids.ast.params.AstNumList)3 File (java.io.File)2 IOException (java.io.IOException)2 ValNum (water.rapids.vals.ValNum)2 PrettyPrint (water.util.PrettyPrint)2