Search in sources :

Example 61 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstVariance method scalar.

// Scalar covariance for 1 row
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
    if (frx.numCols() != fry.numCols())
        throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
    Vec[] vecxs = frx.vecs();
    Vec[] vecys = fry.vecs();
    double xmean = 0, ymean = 0, ncols = frx.numCols(), NACount = 0, xval, yval, ss = 0;
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (Double.isNaN(xval) || Double.isNaN(yval))
            NACount++;
        else {
            xmean += xval;
            ymean += yval;
        }
    }
    xmean /= (ncols - NACount);
    ymean /= (ncols - NACount);
    if (NACount != 0) {
        if (mode.equals(Mode.AllObs))
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (mode.equals(Mode.Everything))
            return new ValNum(Double.NaN);
    }
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (!(Double.isNaN(xval) || Double.isNaN(yval)))
            ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
    }
    return new ValNum(ss / (ncols - NACount - 1));
}
Also used : Vec(water.fvec.Vec) ValNum(water.rapids.vals.ValNum)

Example 62 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstAppend method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame dst = stk.track(asts[1].exec(env)).getFrame();
    Val vsrc = stk.track(asts[2].exec(env));
    String newColName = asts[3].exec(env).getStr();
    Vec vec = dst.anyVec();
    switch(vsrc.type()) {
        case Val.NUM:
            vec = vec.makeCon(vsrc.getNum());
            break;
        case Val.STR:
            throw H2O.unimpl();
        case Val.FRM:
            if (vsrc.getFrame().numCols() != 1)
                throw new IllegalArgumentException("Can only append one column");
            vec = vsrc.getFrame().anyVec();
            break;
        default:
            throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
    }
    dst = new Frame(dst._names.clone(), dst.vecs().clone());
    dst.add(newColName, vec);
    return new ValFrame(dst);
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec)

Example 63 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstRectangleAssign method assign_frame_scalar.

// Boolean assignment with a scalar
private void assign_frame_scalar(Frame dst, int[] cols, Frame rows, Object src, Session ses) {
    Vec bool = rows.vec(0);
    if (dst.numRows() != rows.numRows()) {
        throw new IllegalArgumentException("Frame " + dst._key + " has different number of rows than frame " + rows._key + " (" + dst.numRows() + " vs " + rows.numRows() + ").");
    }
    // Note: this skips "scalar to Vec" compatibility check because the whole Vec is overwritten
    if (bool.isConst() && ((int) bool.min() == 1) && (src instanceof Number)) {
        Vec anyVec = dst.anyVec();
        assert anyVec != null;
        Vec vsrc = anyVec.makeCon((double) src);
        for (int col : cols) dst.replace(col, vsrc);
        if (dst._key != null)
            DKV.put(dst);
        return;
    }
    // Make sure the scalar value is compatible with the target vector
    for (int col : cols) {
        if (!isScalarCompatible(src, dst.vec(col))) {
            throw new IllegalArgumentException("Cannot assign value " + src + " into a vector of type " + dst.vec(col).get_type_str() + ".");
        }
    }
    Vec[] vecs = ses.copyOnWrite(dst, cols);
    // Just the selected columns get updated
    Vec[] vecs2 = new Vec[cols.length];
    for (int i = 0; i < cols.length; i++) vecs2[i] = vecs[cols[i]];
    ConditionalAssignTask.doAssign(vecs2, src, rows.vec(0));
}
Also used : Vec(water.fvec.Vec)

Example 64 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstRectangleAssign method assign_frame_frame.

// Rectangular array copy from src into dst
private void assign_frame_frame(Frame dst, int[] cols, AstNumList rows, Frame src, Session ses) {
    // Sanity check
    if (cols.length != src.numCols())
        throw new IllegalArgumentException("Source and destination frames must have the same count of columns");
    long nrows = rows.cnt();
    if (src.numRows() != nrows)
        throw new IllegalArgumentException("Requires same count of rows in the number-list (" + nrows + ") as in the source (" + src.numRows() + ")");
    // optimization happens here on the apply() exit.
    if (dst.numRows() == nrows && rows.isDense()) {
        for (int i = 0; i < cols.length; i++) dst.replace(cols[i], src.vecs()[i]);
        if (dst._key != null)
            DKV.put(dst);
        return;
    }
    // Partial update; needs to preserve type, and may need to copy to support
    // copy-on-write
    Vec[] dvecs = dst.vecs();
    final Vec[] svecs = src.vecs();
    for (int col = 0; col < cols.length; col++) {
        int dtype = dvecs[cols[col]].get_type();
        if (dtype != svecs[col].get_type())
            throw new IllegalArgumentException("Columns must be the same type; " + "column " + col + ", \'" + dst._names[cols[col]] + "\', is of type " + dvecs[cols[col]].get_type_str() + " and the source is " + svecs[col].get_type_str());
        if ((dtype == Vec.T_CAT) && (!Arrays.equals(dvecs[cols[col]].domain(), svecs[col].domain())))
            throw new IllegalArgumentException("Cannot assign to a categorical column with a different domain; " + "source column " + src._names[col] + ", target column " + dst._names[cols[col]]);
    }
    // Handle fast small case
    if (nrows <= 1 || (cols.length * nrows) <= 1000) {
        // Go parallel for more than 1000 random updates
        // Copy dst columns as-needed to allow update-in-place
        // Update dst columns
        dvecs = ses.copyOnWrite(dst, cols);
        // Just these rows
        long[] rownums = rows.expand8();
        for (int col = 0; col < svecs.length; col++) if (svecs[col].get_type() == Vec.T_STR) {
            BufferedString bStr = new BufferedString();
            for (int ridx = 0; ridx < rownums.length; ridx++) {
                BufferedString s = svecs[col].atStr(bStr, ridx);
                dvecs[cols[col]].set(rownums[ridx], s != null ? s.toString() : null);
            }
        } else {
            for (int ridx = 0; ridx < rownums.length; ridx++) dvecs[cols[col]].set(rownums[ridx], svecs[col].at(ridx));
        }
        return;
    }
    // Handle large case
    Vec[] vecs = ses.copyOnWrite(dst, cols);
    // Just the selected columns get updated
    Vec[] vecs2 = new Vec[cols.length];
    for (int i = 0; i < cols.length; i++) vecs2[i] = vecs[cols[i]];
    // Side-effect internal sort; needed for fast row lookup
    rows.sort();
    new AssignFrameFrameTask(rows, svecs).doAll(vecs2);
}
Also used : Vec(water.fvec.Vec) BufferedString(water.parser.BufferedString)

Example 65 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstHist method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // stack is [ ..., ary, breaks]
    // handle the breaks
    Frame fr2;
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    if (f.numCols() != 1)
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    Vec vec = f.anyVec();
    if (!vec.isNumeric())
        throw new IllegalArgumentException("Hist only applies to single numeric columns.");
    //TODO Add case when vec is a constant numeric
    if (vec.isConst())
        throw new IllegalArgumentException("Hist does not apply to constant numeric columns.");
    AstRoot a = asts[2];
    String algo = null;
    int numBreaks = -1;
    double[] breaks = null;
    if (a instanceof AstStr)
        algo = a.str().toLowerCase();
    else if (a instanceof AstNumList)
        breaks = ((AstNumList) a).expand();
    else if (a instanceof AstNum)
        numBreaks = (int) a.exec(env).getNum();
    AstHist.HistTask t;
    double h;
    double x1 = vec.max();
    double x0 = vec.min();
    if (breaks != null)
        t = new AstHist.HistTask(breaks, -1, -1).doAll(vec);
    else if (algo != null) {
        switch(algo) {
            case "sturges":
                numBreaks = sturges(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "rice":
                numBreaks = rice(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "sqrt":
                numBreaks = sqrt(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "doane":
                numBreaks = doane(vec);
                h = (x1 - x0) / numBreaks;
                break;
            case "scott":
                h = scotts_h(vec);
                numBreaks = scott(vec, h);
                // special bin width computation
                break;
            case "fd":
                h = fds_h(vec);
                numBreaks = fd(vec, h);
                // special bin width computation
                break;
            default:
                numBreaks = sturges(vec);
                // just do sturges even if junk passed in
                h = (x1 - x0) / numBreaks;
        }
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    } else {
        h = (x1 - x0) / numBreaks;
        t = new AstHist.HistTask(computeCuts(vec, numBreaks), h, x0).doAll(vec);
    }
    // wanna make a new frame here [breaks,counts,mids]
    final double[] brks = t._breaks;
    final long[] cnts = t._counts;
    final double[] mids_true = t._mids;
    final double[] mids = new double[t._breaks.length - 1];
    for (int i = 1; i < brks.length; ++i) mids[i - 1] = .5 * (t._breaks[i - 1] + t._breaks[i]);
    Vec layoutVec = Vec.makeZero(brks.length);
    fr2 = new MRTask() {

        @Override
        public void map(Chunk[] c, NewChunk[] nc) {
            int start = (int) c[0].start();
            for (int i = 0; i < c[0]._len; ++i) {
                nc[0].addNum(brks[i + start]);
                if (i == 0) {
                    nc[1].addNA();
                    nc[2].addNA();
                    nc[3].addNA();
                } else {
                    nc[1].addNum(cnts[(i - 1) + start]);
                    nc[2].addNum(mids_true[(i - 1) + start]);
                    nc[3].addNum(mids[(i - 1) + start]);
                }
            }
        }
    }.doAll(4, Vec.T_NUM, new Frame(layoutVec)).outputFrame(null, new String[] { "breaks", "counts", "mids_true", "mids" }, null);
    layoutVec.remove();
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) AstNum(water.rapids.ast.params.AstNum) Vec(water.fvec.Vec) AstStr(water.rapids.ast.params.AstStr) MRTask(water.MRTask) AstRoot(water.rapids.ast.AstRoot) AstNumList(water.rapids.ast.params.AstNumList)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9