Example 6 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstSetTimeZone method apply.

public ValNum apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    final String tz = asts[1].exec(env).getStr();
    Set<String> idSet = DateTimeZone.getAvailableIDs();
    if (!idSet.contains(tz))
        throw new IllegalArgumentException("Unacceptable timezone " + tz + " given.  For a list of acceptable names, use listTimezone().");
    new MRTask() {

        public void setupLocal() {
    return new ValNum(Double.NaN);
Also used : MRTask(water.MRTask) ValNum(water.rapids.vals.ValNum)

Example 7 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstVariance method scalar.

// Scalar covariance for 1 row
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
    if (frx.numCols() != fry.numCols())
        throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
    Vec[] vecxs = frx.vecs();
    Vec[] vecys = fry.vecs();
    double xmean = 0, ymean = 0, ncols = frx.numCols(), NACount = 0, xval, yval, ss = 0;
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (Double.isNaN(xval) || Double.isNaN(yval))
        else {
            xmean += xval;
            ymean += yval;
    xmean /= (ncols - NACount);
    ymean /= (ncols - NACount);
    if (NACount != 0) {
        if (mode.equals(Mode.AllObs))
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (mode.equals(Mode.Everything))
            return new ValNum(Double.NaN);
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (!(Double.isNaN(xval) || Double.isNaN(yval)))
            ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
    return new ValNum(ss / (ncols - NACount - 1));
Also used : Vec(water.fvec.Vec) ValNum(water.rapids.vals.ValNum)

Example 8 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstUniOp method exec.

public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            for (int i = 0; i < fr.numCols(); i++) if (!fr.vec(i).isNumeric())
                throw new IllegalArgumentException("Operator " + str() + "() cannot be applied to non-numeric column " +;
            // Get length of columns in fr and append `op(colName)`. For example, a column named "income" that had
            // a log transformation would now be changed to `log(income)`.
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = str() + "(" + + ")";
            return new ValFrame(new MRTask() {

                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(op(c.atd(i)));
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.ROW:
            double[] ds = new double[val.getRow().length];
            for (int i = 0; i < ds.length; ++i) ds[i] = op(val.getRow()[i]);
            String[] names = ((ValRow) val).getNames().clone();
            return new ValRow(ds, names);
            throw H2O.unimpl("unop unimpl: " + val.getClass());
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) MRTask(water.MRTask)

Example 9 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstCorrelation method scalar.

// Pearson Correlation for one row, which will return a scalar value.
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
    if (frx.numCols() != fry.numCols())
        throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
    Vec[] vecxs = frx.vecs();
    Vec[] vecys = fry.vecs();
    double xmean = 0;
    double ymean = 0;
    double xvar = 0;
    double yvar = 0;
    double xsd;
    double ysd;
    double ncols = fry.numCols();
    double NACount = 0;
    double xval;
    double yval;
    double ss = 0;
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (Double.isNaN(xval) || Double.isNaN(yval))
        else {
            xmean += xval;
            ymean += yval;
    xmean /= (ncols - NACount);
    ymean /= (ncols - NACount);
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (!(Double.isNaN(xval) || Double.isNaN(yval))) {
            //Compute variance of x and y vars
            xvar += Math.pow((vecxs[r].at(0) - xmean), 2);
            yvar += Math.pow((vecys[r].at(0) - ymean), 2);
            //Compute sum of squares of x and y
            ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
    //Sample Standard Deviation
    xsd = Math.sqrt(xvar / (ncols - 1 - NACount));
    //Sample Standard Deviation
    ysd = Math.sqrt(yvar / (ncols - 1 - NACount));
    //sd(x) * sd(y)
    double denom = xsd * ysd;
    if (NACount != 0) {
        if (mode.equals(Mode.AllObs))
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (mode.equals(Mode.Everything))
            return new ValNum(Double.NaN);
    //Pearson's Correlation Coefficient
    return new ValNum((ss / (ncols - NACount - 1)) / denom);
Also used : ValNum(water.rapids.vals.ValNum)

Example 10 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstCorrelation method array.

// Matrix correlation.  Compute correlation between all columns from each Frame
// against each other.  Return a matrix of correlations which is frx.numCols
// wide and fry.numCols tall.
private Val array(Frame frx, Frame fry, Mode mode) {
    Vec[] vecxs = frx.vecs();
    int ncolx = vecxs.length;
    Vec[] vecys = fry.vecs();
    int ncoly = vecys.length;
    if (mode.equals(Mode.Everything) || mode.equals(Mode.AllObs)) {
        if (mode.equals(Mode.AllObs)) {
            for (Vec v : vecxs) if (v.naCnt() != 0)
                throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        //Set up CoVarTask
        CoVarTask[] cvs = new CoVarTask[ncoly];
        //Get mean of x vecs
        double[] xmeans = new double[ncolx];
        for (int x = 0; x < ncolx; x++) {
            xmeans[x] = vecxs[x].mean();
        //Set up double arrays to capture sd(x), sd(y) and sd(x) * sd(y)
        double[] sigmay = new double[ncoly];
        double[] sigmax = new double[ncolx];
        double[][] denom = new double[ncoly][ncolx];
        // Launch tasks; each does all Xs vs one Y
        for (int y = 0; y < ncoly; y++) {
            //Get covariance between x and y
            cvs[y] = new CoVarTask(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(frx));
            //Get sigma of y vecs
            sigmay[y] = vecys[y].sigma();
        //Get sigma of x vecs
        for (int x = 0; x < ncolx; x++) {
            sigmax[x] = vecxs[x].sigma();
        //Denominator for correlation calculation is sigma_y * sigma_x (All x sigmas vs one Y)
        for (int y = 0; y < ncoly; y++) {
            for (int x = 0; x < ncolx; x++) {
                denom[y][x] = sigmay[y] * sigmax[x];
        // 1-col returns scalar
        if (ncolx == 1 && ncoly == 1) {
            return new ValNum((cvs[0].getResult()._covs[0] / (fry.numRows() - 1)) / denom[0][0]);
        //Gather final result, which is the correlation coefficient per column
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) {
            res[y] = Vec.makeVec(ArrayUtils.div(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), denom[y]), keys[y]);
        return new ValFrame(new Frame(fry._names, res));
    } else {
        //if (mode.equals(Mode.CompleteObs))
        //Omit NA rows between X and Y.
        //This will help with cov, sigma & mean calculations later as we only want to calculate cov, sigma, & mean
        //for rows with no NAs
        Frame frxy_naomit = new MRTask() {

            private void copyRow(int row, Chunk[] cs, NewChunk[] ncs) {
                for (int i = 0; i < cs.length; ++i) {
                    if (cs[i] instanceof CStrChunk)
                        ncs[i].addStr(cs[i], row);
                    else if (cs[i] instanceof C16Chunk)
                        ncs[i].addUUID(cs[i], row);
                    else if (cs[i].hasFloat())
                        ncs[i].addNum(cs[i].at8(row), 0);

            public void map(Chunk[] cs, NewChunk[] ncs) {
                int col;
                for (int row = 0; row < cs[0]._len; ++row) {
                    for (col = 0; col < cs.length; ++col) if (cs[col].isNA(row))
                    if (col == cs.length)
                        copyRow(row, cs, ncs);
        }.doAll(new Frame(frx).add(fry).types(), new Frame(frx).add(fry)).outputFrame(new Frame(frx).add(fry).names(), new Frame(frx).add(fry).domains());
        //Collect new vecs that do not contain NA rows
        Vec[] vecxs_naomit = frxy_naomit.subframe(0, ncolx).vecs();
        int ncolx_naomit = vecxs_naomit.length;
        Vec[] vecys_naomit = frxy_naomit.subframe(ncolx, frxy_naomit.vecs().length).vecs();
        int ncoly_naomit = vecys_naomit.length;
        //Set up CoVarTask
        CoVarTask[] cvs = new CoVarTask[ncoly_naomit];
        //Get mean of X vecs
        double[] xmeans = new double[ncolx_naomit];
        for (int x = 0; x < ncolx_naomit; x++) {
            xmeans[x] = vecxs_naomit[x].mean();
        //Set up double arrays to capture sd(x), sd(y) and sd(x) * sd(y)
        double[] sigmay = new double[ncoly_naomit];
        double[] sigmax = new double[ncolx_naomit];
        double[][] denom = new double[ncoly_naomit][ncolx_naomit];
        // Launch tasks; each does all Xs vs one Y
        for (int y = 0; y < ncoly_naomit; y++) {
            //Get covariance between x and y
            cvs[y] = new CoVarTask(vecys_naomit[y].mean(), xmeans).dfork(new Frame(vecys_naomit[y]).add(frxy_naomit.subframe(0, ncolx)));
            //Get sigma of y vecs
            sigmay[y] = vecys_naomit[y].sigma();
        //Get sigma of x vecs
        for (int x = 0; x < ncolx_naomit; x++) {
            sigmax[x] = vecxs_naomit[x].sigma();
        //Denominator for correlation calculation is sigma_y * sigma_x (All x sigmas vs one Y)
        for (int y = 0; y < ncoly_naomit; y++) {
            for (int x = 0; x < ncolx_naomit; x++) {
                denom[y][x] = sigmay[y] * sigmax[x];
        // 1-col returns scalar
        if (ncolx_naomit == 1 && ncoly_naomit == 1) {
            return new ValNum((cvs[0].getResult()._covs[0] / (frxy_naomit.numRows() - 1)) / denom[0][0]);
        //Gather final result, which is the correlation coefficient per column
        Vec[] res = new Vec[ncoly_naomit];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly_naomit);
        for (int y = 0; y < ncoly_naomit; y++) {
            res[y] = Vec.makeVec(ArrayUtils.div(ArrayUtils.div(cvs[y].getResult()._covs, (frxy_naomit.numRows() - 1)), denom[y]), keys[y]);
        return new ValFrame(new Frame(frxy_naomit.subframe(ncolx, frxy_naomit.vecs().length)._names, res));
Also used : ValFrame(water.rapids.vals.ValFrame) ValNum(water.rapids.vals.ValNum) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask) Key(water.Key)


