Search in sources :

Example 6 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstSetTimeZone method apply.

@Override
public ValNum apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    final String tz = asts[1].exec(env).getStr();
    Set<String> idSet = DateTimeZone.getAvailableIDs();
    if (!idSet.contains(tz))
        throw new IllegalArgumentException("Unacceptable timezone " + tz + " given.  For a list of acceptable names, use listTimezone().");
    new MRTask() {

        @Override
        public void setupLocal() {
            ParseTime.setTimezone(tz);
        }
    }.doAllNodes();
    return new ValNum(Double.NaN);
}
Also used : MRTask(water.MRTask) ValNum(water.rapids.vals.ValNum)

Example 7 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstVariance method scalar.

// Scalar covariance for 1 row
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
    if (frx.numCols() != fry.numCols())
        throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
    Vec[] vecxs = frx.vecs();
    Vec[] vecys = fry.vecs();
    double xmean = 0, ymean = 0, ncols = frx.numCols(), NACount = 0, xval, yval, ss = 0;
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (Double.isNaN(xval) || Double.isNaN(yval))
            NACount++;
        else {
            xmean += xval;
            ymean += yval;
        }
    }
    xmean /= (ncols - NACount);
    ymean /= (ncols - NACount);
    if (NACount != 0) {
        if (mode.equals(Mode.AllObs))
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (mode.equals(Mode.Everything))
            return new ValNum(Double.NaN);
    }
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (!(Double.isNaN(xval) || Double.isNaN(yval)))
            ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
    }
    return new ValNum(ss / (ncols - NACount - 1));
}
Also used : Vec(water.fvec.Vec) ValNum(water.rapids.vals.ValNum)

Example 8 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstUniOp method exec.

@Override
public Val exec(Val... args) {
    Val val = args[1];
    switch(val.type()) {
        case Val.NUM:
            return new ValNum(op(val.getNum()));
        case Val.FRM:
            Frame fr = val.getFrame();
            for (int i = 0; i < fr.numCols(); i++) if (!fr.vec(i).isNumeric())
                throw new IllegalArgumentException("Operator " + str() + "() cannot be applied to non-numeric column " + fr.name(i));
            // Get length of columns in fr and append `op(colName)`. For example, a column named "income" that had
            // a log transformation would now be changed to `log(income)`.
            String[] newNames = new String[fr.numCols()];
            for (int i = 0; i < newNames.length; i++) {
                newNames[i] = str() + "(" + fr.name(i) + ")";
            }
            return new ValFrame(new MRTask() {

                @Override
                public void map(Chunk[] cs, NewChunk[] ncs) {
                    for (int col = 0; col < cs.length; col++) {
                        Chunk c = cs[col];
                        NewChunk nc = ncs[col];
                        for (int i = 0; i < c._len; i++) nc.addNum(op(c.atd(i)));
                    }
                }
            }.doAll(fr.numCols(), Vec.T_NUM, fr).outputFrame(newNames, null));
        case Val.ROW:
            double[] ds = new double[val.getRow().length];
            for (int i = 0; i < ds.length; ++i) ds[i] = op(val.getRow()[i]);
            String[] names = ((ValRow) val).getNames().clone();
            return new ValRow(ds, names);
        default:
            throw H2O.unimpl("unop unimpl: " + val.getClass());
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) ValNum(water.rapids.vals.ValNum) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) ValRow(water.rapids.vals.ValRow) MRTask(water.MRTask)

Example 9 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstCorrelation method scalar.

// Pearson Correlation for one row, which will return a scalar value.
private ValNum scalar(Frame frx, Frame fry, Mode mode) {
    if (frx.numCols() != fry.numCols())
        throw new IllegalArgumentException("Single rows must have the same number of columns, found " + frx.numCols() + " and " + fry.numCols());
    Vec[] vecxs = frx.vecs();
    Vec[] vecys = fry.vecs();
    double xmean = 0;
    double ymean = 0;
    double xvar = 0;
    double yvar = 0;
    double xsd;
    double ysd;
    double ncols = fry.numCols();
    double NACount = 0;
    double xval;
    double yval;
    double ss = 0;
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (Double.isNaN(xval) || Double.isNaN(yval))
            NACount++;
        else {
            xmean += xval;
            ymean += yval;
        }
    }
    xmean /= (ncols - NACount);
    ymean /= (ncols - NACount);
    for (int r = 0; r < ncols; r++) {
        xval = vecxs[r].at(0);
        yval = vecys[r].at(0);
        if (!(Double.isNaN(xval) || Double.isNaN(yval))) {
            //Compute variance of x and y vars
            xvar += Math.pow((vecxs[r].at(0) - xmean), 2);
            yvar += Math.pow((vecys[r].at(0) - ymean), 2);
            //Compute sum of squares of x and y
            ss += (vecxs[r].at(0) - xmean) * (vecys[r].at(0) - ymean);
        }
    }
    //Sample Standard Deviation
    xsd = Math.sqrt(xvar / (ncols - 1 - NACount));
    //Sample Standard Deviation
    ysd = Math.sqrt(yvar / (ncols - 1 - NACount));
    //sd(x) * sd(y)
    double denom = xsd * ysd;
    if (NACount != 0) {
        if (mode.equals(Mode.AllObs))
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (mode.equals(Mode.Everything))
            return new ValNum(Double.NaN);
    }
    //Pearson's Correlation Coefficient
    return new ValNum((ss / (ncols - NACount - 1)) / denom);
}
Also used : ValNum(water.rapids.vals.ValNum)

Example 10 with ValNum

use of water.rapids.vals.ValNum in project h2o-3 by h2oai.

the class AstCorrelation method array.

// Matrix correlation.  Compute correlation between all columns from each Frame
// against each other.  Return a matrix of correlations which is frx.numCols
// wide and fry.numCols tall.
private Val array(Frame frx, Frame fry, Mode mode) {
    Vec[] vecxs = frx.vecs();
    int ncolx = vecxs.length;
    Vec[] vecys = fry.vecs();
    int ncoly = vecys.length;
    if (mode.equals(Mode.Everything) || mode.equals(Mode.AllObs)) {
        if (mode.equals(Mode.AllObs)) {
            for (Vec v : vecxs) if (v.naCnt() != 0)
                throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        }
        //Set up CoVarTask
        CoVarTask[] cvs = new CoVarTask[ncoly];
        //Get mean of x vecs
        double[] xmeans = new double[ncolx];
        for (int x = 0; x < ncolx; x++) {
            xmeans[x] = vecxs[x].mean();
        }
        //Set up double arrays to capture sd(x), sd(y) and sd(x) * sd(y)
        double[] sigmay = new double[ncoly];
        double[] sigmax = new double[ncolx];
        double[][] denom = new double[ncoly][ncolx];
        // Launch tasks; each does all Xs vs one Y
        for (int y = 0; y < ncoly; y++) {
            //Get covariance between x and y
            cvs[y] = new CoVarTask(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(frx));
            //Get sigma of y vecs
            sigmay[y] = vecys[y].sigma();
        }
        //Get sigma of x vecs
        for (int x = 0; x < ncolx; x++) {
            sigmax[x] = vecxs[x].sigma();
        }
        //Denominator for correlation calculation is sigma_y * sigma_x (All x sigmas vs one Y)
        for (int y = 0; y < ncoly; y++) {
            for (int x = 0; x < ncolx; x++) {
                denom[y][x] = sigmay[y] * sigmax[x];
            }
        }
        // 1-col returns scalar
        if (ncolx == 1 && ncoly == 1) {
            return new ValNum((cvs[0].getResult()._covs[0] / (fry.numRows() - 1)) / denom[0][0]);
        }
        //Gather final result, which is the correlation coefficient per column
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) {
            res[y] = Vec.makeVec(ArrayUtils.div(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), denom[y]), keys[y]);
        }
        return new ValFrame(new Frame(fry._names, res));
    } else {
        //if (mode.equals(Mode.CompleteObs))
        //Omit NA rows between X and Y.
        //This will help with cov, sigma & mean calculations later as we only want to calculate cov, sigma, & mean
        //for rows with no NAs
        Frame frxy_naomit = new MRTask() {

            private void copyRow(int row, Chunk[] cs, NewChunk[] ncs) {
                for (int i = 0; i < cs.length; ++i) {
                    if (cs[i] instanceof CStrChunk)
                        ncs[i].addStr(cs[i], row);
                    else if (cs[i] instanceof C16Chunk)
                        ncs[i].addUUID(cs[i], row);
                    else if (cs[i].hasFloat())
                        ncs[i].addNum(cs[i].atd(row));
                    else
                        ncs[i].addNum(cs[i].at8(row), 0);
                }
            }

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                int col;
                for (int row = 0; row < cs[0]._len; ++row) {
                    for (col = 0; col < cs.length; ++col) if (cs[col].isNA(row))
                        break;
                    if (col == cs.length)
                        copyRow(row, cs, ncs);
                }
            }
        }.doAll(new Frame(frx).add(fry).types(), new Frame(frx).add(fry)).outputFrame(new Frame(frx).add(fry).names(), new Frame(frx).add(fry).domains());
        //Collect new vecs that do not contain NA rows
        Vec[] vecxs_naomit = frxy_naomit.subframe(0, ncolx).vecs();
        int ncolx_naomit = vecxs_naomit.length;
        Vec[] vecys_naomit = frxy_naomit.subframe(ncolx, frxy_naomit.vecs().length).vecs();
        int ncoly_naomit = vecys_naomit.length;
        //Set up CoVarTask
        CoVarTask[] cvs = new CoVarTask[ncoly_naomit];
        //Get mean of X vecs
        double[] xmeans = new double[ncolx_naomit];
        for (int x = 0; x < ncolx_naomit; x++) {
            xmeans[x] = vecxs_naomit[x].mean();
        }
        //Set up double arrays to capture sd(x), sd(y) and sd(x) * sd(y)
        double[] sigmay = new double[ncoly_naomit];
        double[] sigmax = new double[ncolx_naomit];
        double[][] denom = new double[ncoly_naomit][ncolx_naomit];
        // Launch tasks; each does all Xs vs one Y
        for (int y = 0; y < ncoly_naomit; y++) {
            //Get covariance between x and y
            cvs[y] = new CoVarTask(vecys_naomit[y].mean(), xmeans).dfork(new Frame(vecys_naomit[y]).add(frxy_naomit.subframe(0, ncolx)));
            //Get sigma of y vecs
            sigmay[y] = vecys_naomit[y].sigma();
        }
        //Get sigma of x vecs
        for (int x = 0; x < ncolx_naomit; x++) {
            sigmax[x] = vecxs_naomit[x].sigma();
        }
        //Denominator for correlation calculation is sigma_y * sigma_x (All x sigmas vs one Y)
        for (int y = 0; y < ncoly_naomit; y++) {
            for (int x = 0; x < ncolx_naomit; x++) {
                denom[y][x] = sigmay[y] * sigmax[x];
            }
        }
        // 1-col returns scalar
        if (ncolx_naomit == 1 && ncoly_naomit == 1) {
            return new ValNum((cvs[0].getResult()._covs[0] / (frxy_naomit.numRows() - 1)) / denom[0][0]);
        }
        //Gather final result, which is the correlation coefficient per column
        Vec[] res = new Vec[ncoly_naomit];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly_naomit);
        for (int y = 0; y < ncoly_naomit; y++) {
            res[y] = Vec.makeVec(ArrayUtils.div(ArrayUtils.div(cvs[y].getResult()._covs, (frxy_naomit.numRows() - 1)), denom[y]), keys[y]);
        }
        return new ValFrame(new Frame(frxy_naomit.subframe(ncolx, frxy_naomit.vecs().length)._names, res));
    }
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValNum(water.rapids.vals.ValNum) ValFrame(water.rapids.vals.ValFrame) MRTask(water.MRTask) Key(water.Key)

Aggregations

ValNum (water.rapids.vals.ValNum)20 Frame (water.fvec.Frame)15 Vec (water.fvec.Vec)11 ValFrame (water.rapids.vals.ValFrame)8 MRTask (water.MRTask)6 Key (water.Key)4 NewChunk (water.fvec.NewChunk)4 Val (water.rapids.Val)4 Chunk (water.fvec.Chunk)3 ValRow (water.rapids.vals.ValRow)3 BufferedString (water.parser.BufferedString)2 Model (hex.Model)1 QuantileModel (hex.quantile.QuantileModel)1 MutableDateTime (org.joda.time.MutableDateTime)1 Iced (water.Iced)1 Value (water.Value)1 AstExec (water.rapids.ast.AstExec)1 AstId (water.rapids.ast.params.AstId)1 ValStr (water.rapids.vals.ValStr)1