Search in sources :

Example 11 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class TimeSeriesTests method testIsax.

@Test
public void testIsax() {
    //
    Val res1 = Rapids.exec("(cumsum " + f._key + " 1)");
    fr1 = res1.getFrame();
    DKV.put(fr1);
    // 10 words 10 max cardinality 0 optimize card
    Val res2 = Rapids.exec("(isax " + fr1._key + " 10 10 0)");
    fr2 = res2.getFrame();
    String expected = "0^10_0^10_0^10_0^10_5^10_7^10_8^10_9^10_9^10_8^10";
    final String actual = fr2.vec(0).atStr(new BufferedString(), 0).toString();
    Assert.assertEquals(expected, actual);
}
Also used : Val(water.rapids.Val) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString) Test(org.junit.Test)

Example 12 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class GLMTest method testBounds.

// Leask xval keys
//  @Test public void testXval() {
//    GLMModel model = null;
//    Frame fr = parse_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
//    Frame score = null;
//    try{
//      Scope.enter();
//      // R results
////      Coefficients:
////        (Intercept)           ID          AGE       RACER2       RACER3        DPROS        DCAPS          PSA          VOL      GLEASON
////          -8.894088     0.001588    -0.009589     0.231777    -0.459937     0.556231     0.556395     0.027854    -0.011355     1.010179
//      String [] cfs1 = new String [] {"Intercept","AGE", "RACE.R2","RACE.R3", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"};
//      double [] vals = new double [] {-8.14867, -0.01368, 0.32337, -0.38028, 0.55964, 0.49548, 0.02794, -0.01104, 0.97704};
//      GLMParameters params = new GLMParameters(Family.binomial);
//      params._n_folds = 10;
//      params._response_column = "CAPSULE";
//      params._ignored_columns = new String[]{"ID"};
//      params._train = fr._key;
//      params._lambda = new double[]{0};
//      model = new GLM(params,Key.make("prostate_model")).trainModel().get();
//      HashMap<String, Double> coefs = model.coefficients();
//      for(int i = 0; i < cfs1.length; ++i)
//        assertEquals(vals[i], coefs.get(cfs1[i]),1e-4);
//      GLMValidation val = model.trainVal();
////      assertEquals(512.3, val.nullDeviance(),1e-1);
////      assertEquals(378.3, val.residualDeviance(),1e-1);
////      assertEquals(396.3, val.AIC(),1e-1);
////      score = model.score(fr);
////
////      hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(model,fr);
////
////      AUCData adata = mm._aucdata;
////      assertEquals(val.auc(),adata.AUC(),1e-2);
////      GLMValidation val2 = new GLMValidationTsk(params,model._ymu,rank(model.beta())).doAll(new Vec[]{fr.vec("CAPSULE"),score.vec("1")})._val;
////      assertEquals(val.residualDeviance(),val2.residualDeviance(),1e-6);
////      assertEquals(val.nullDeviance(),val2.nullDeviance(),1e-6);
//    } finally {
//      fr.delete();
//      if(model != null)model.delete();
//      if(score != null)score.delete();
//      Scope.exit();
//    }
//  }
/**
   * Test bounds on prostate dataset, 2 cases :
   * 1) test against known result in glmnet (with elastic net regularization) with elastic net penalty
   * 2) test with no regularization, check the ginfo in the end.
   */
@Test
public void testBounds() {
    //    glmnet's result:
    //    res2 <- glmnet(x=M,y=D$CAPSULE,lower.limits=-.5,upper.limits=.5,family='binomial')
    //    res2$beta[,58]
    //    AGE        RACE          DPROS       PSA         VOL         GLEASON
    //    -0.00616326 -0.50000000  0.50000000  0.03628192 -0.01249324  0.50000000 //    res2$a0[100]
    //    res2$a0[58]
    //    s57
    //    -4.155864
    //    lambda = 0.001108, null dev =  512.2888, res dev = 379.7597
    GLMModel model = null;
    Key parsed = Key.make("prostate_parsed");
    Key modelKey = Key.make("prostate_model");
    Frame fr = parse_test_file(parsed, "smalldata/logreg/prostate.csv");
    Key betaConsKey = Key.make("beta_constraints");
    String[] cfs1 = new String[] { "AGE", "RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON", "Intercept" };
    double[] vals = new double[] { -0.006502588, -0.500000000, 0.500000000, 0.400000000, 0.034826559, -0.011661747, 0.500000000, -4.564024 };
    //    [AGE, RACE, DPROS, DCAPS, PSA, VOL, GLEASON, Intercept]
    FVecTest.makeByteVec(betaConsKey, "names, lower_bounds, upper_bounds\n AGE, -.5, .5\n RACE, -.5, .5\n DCAPS, -.4, .4\n DPROS, -.5, .5 \nPSA, -.5, .5\n VOL, -.5, .5\nGLEASON, -.5, .5");
    Frame betaConstraints = ParseDataset.parse(Key.make("beta_constraints.hex"), betaConsKey);
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLMParameters params = new GLMParameters();
        params._standardize = true;
        params._family = Family.binomial;
        params._beta_constraints = betaConstraints._key;
        params._response_column = "CAPSULE";
        params._ignored_columns = new String[] { "ID" };
        params._train = fr._key;
        params._objective_epsilon = 0;
        params._alpha = new double[] { 1 };
        params._lambda = new double[] { 0.001607 };
        params._obj_reg = 1.0 / 380;
        GLM glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        assertTrue(glm.isStopped());
        //      Map<String, Double> coefs =  model.coefficients();
        //      for (int i = 0; i < cfs1.length; ++i)
        //        assertEquals(vals[i], coefs.get(cfs1[i]), 1e-1);
        ModelMetricsBinomialGLM val = (ModelMetricsBinomialGLM) model._output._training_metrics;
        assertEquals(512.2888, val._nullDev, 1e-1);
        // 388.4952716196743
        assertTrue(val._resDev <= 388.5);
        model.delete();
        params._lambda = new double[] { 0 };
        params._alpha = new double[] { 0 };
        FVecTest.makeByteVec(betaConsKey, "names, lower_bounds, upper_bounds\n RACE, -.5, .5\n DCAPS, -.4, .4\n DPROS, -.5, .5 \nPSA, -.5, .5\n VOL, -.5, .5");
        betaConstraints = ParseDataset.parse(Key.make("beta_constraints.hex"), betaConsKey);
        glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        assertTrue(glm.isStopped());
        double[] beta = model.beta();
        System.out.println("beta = " + Arrays.toString(beta));
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        fr.remove("ID").remove();
        DKV.put(fr._key, fr);
        // now check the ginfo
        DataInfo dinfo = new DataInfo(fr, null, 1, true, TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
        GLMGradientTask lt = new GLMBinomialGradientTask(null, dinfo, params, 0, beta).doAll(dinfo._adaptedFrame);
        double[] grad = lt._gradient;
        String[] names = model.dinfo().coefNames();
        BufferedString tmpStr = new BufferedString();
        outer: for (int i = 0; i < names.length; ++i) {
            for (int j = 0; j < betaConstraints.numRows(); ++j) {
                if (betaConstraints.vec("names").atStr(tmpStr, j).toString().equals(names[i])) {
                    if (Math.abs(beta[i] - betaConstraints.vec("lower_bounds").at(j)) < 1e-4 || Math.abs(beta[i] - betaConstraints.vec("upper_bounds").at(j)) < 1e-4) {
                        continue outer;
                    }
                }
            }
            assertEquals(0, grad[i], 1e-2);
        }
    } finally {
        fr.delete();
        betaConstraints.delete();
        if (model != null)
            model.delete();
    }
}
Also used : BufferedString(water.parser.BufferedString) GLMParameters(hex.glm.GLMModel.GLMParameters) BufferedString(water.parser.BufferedString)

Example 13 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class ShuffleSplitFrameTest method testShuffleSplitWithMultipleColumns.

@Test
public /* this test makes sure that the rows of the split frames are preserved (including UUID) */
void testShuffleSplitWithMultipleColumns() {
    long[] chunkLayout = ar(2L, 2L, 3L);
    String[][] data = ar(ar("1", "2"), ar(null, "3"), ar("4", "5", "6"));
    Frame f = null;
    Frame tmpFrm = createFrame("test1.hex", chunkLayout, data);
    try {
        f = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                for (int i = 0; i < cs[0]._len; i++) {
                    BufferedString bs = cs[0].atStr(new BufferedString(), i);
                    int val = bs == null ? 0 : Integer.parseInt(bs.toString());
                    ncs[0].addStr(bs);
                    ncs[1].addNum(val);
                    ncs[2].addNum(i);
                    ncs[3].addUUID(i, val);
                }
            }
        }.doAll(new byte[] { Vec.T_STR, Vec.T_NUM, Vec.T_NUM, Vec.T_UUID }, tmpFrm).outputFrame();
    } finally {
        tmpFrm.delete();
    }
    testScenario(f, flat(data), new MRTask() {

        @Override
        public void map(Chunk[] cs) {
            for (int i = 0; i < cs[0]._len; i++) {
                BufferedString bs = cs[0].atStr(new BufferedString(), i);
                int expectedVal = bs == null ? 0 : Integer.parseInt(bs.toString());
                int expectedIndex = (int) cs[2].atd(i);
                Assert.assertEquals((double) expectedVal, cs[1].atd(i), 0.00001);
                Assert.assertEquals(expectedIndex, (int) cs[3].at16l(i));
                Assert.assertEquals(expectedVal, (int) cs[3].at16h(i));
            }
        }
    });
}
Also used : FrameTestUtil.createFrame(water.fvec.FrameTestUtil.createFrame) Frame(water.fvec.Frame) MRTask(water.MRTask) BufferedString(water.parser.BufferedString) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk) Test(org.junit.Test)

Example 14 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class AstRectangleAssign method assign_frame_frame.

// Rectangular array copy from src into dst
private void assign_frame_frame(Frame dst, int[] cols, AstNumList rows, Frame src, Session ses) {
    // Sanity check
    if (cols.length != src.numCols())
        throw new IllegalArgumentException("Source and destination frames must have the same count of columns");
    long nrows = rows.cnt();
    if (src.numRows() != nrows)
        throw new IllegalArgumentException("Requires same count of rows in the number-list (" + nrows + ") as in the source (" + src.numRows() + ")");
    // optimization happens here on the apply() exit.
    if (dst.numRows() == nrows && rows.isDense()) {
        for (int i = 0; i < cols.length; i++) dst.replace(cols[i], src.vecs()[i]);
        if (dst._key != null)
            DKV.put(dst);
        return;
    }
    // Partial update; needs to preserve type, and may need to copy to support
    // copy-on-write
    Vec[] dvecs = dst.vecs();
    final Vec[] svecs = src.vecs();
    for (int col = 0; col < cols.length; col++) {
        int dtype = dvecs[cols[col]].get_type();
        if (dtype != svecs[col].get_type())
            throw new IllegalArgumentException("Columns must be the same type; " + "column " + col + ", \'" + dst._names[cols[col]] + "\', is of type " + dvecs[cols[col]].get_type_str() + " and the source is " + svecs[col].get_type_str());
        if ((dtype == Vec.T_CAT) && (!Arrays.equals(dvecs[cols[col]].domain(), svecs[col].domain())))
            throw new IllegalArgumentException("Cannot assign to a categorical column with a different domain; " + "source column " + src._names[col] + ", target column " + dst._names[cols[col]]);
    }
    // Handle fast small case
    if (nrows <= 1 || (cols.length * nrows) <= 1000) {
        // Go parallel for more than 1000 random updates
        // Copy dst columns as-needed to allow update-in-place
        // Update dst columns
        dvecs = ses.copyOnWrite(dst, cols);
        // Just these rows
        long[] rownums = rows.expand8();
        for (int col = 0; col < svecs.length; col++) if (svecs[col].get_type() == Vec.T_STR) {
            BufferedString bStr = new BufferedString();
            for (int ridx = 0; ridx < rownums.length; ridx++) {
                BufferedString s = svecs[col].atStr(bStr, ridx);
                dvecs[cols[col]].set(rownums[ridx], s != null ? s.toString() : null);
            }
        } else {
            for (int ridx = 0; ridx < rownums.length; ridx++) dvecs[cols[col]].set(rownums[ridx], svecs[col].at(ridx));
        }
        return;
    }
    // Handle large case
    Vec[] vecs = ses.copyOnWrite(dst, cols);
    // Just the selected columns get updated
    Vec[] vecs2 = new Vec[cols.length];
    for (int i = 0; i < cols.length; i++) vecs2[i] = vecs[cols[i]];
    // Side-effect internal sort; needed for fast row lookup
    rows.sort();
    new AssignFrameFrameTask(rows, svecs).doAll(vecs2);
}
Also used : Vec(water.fvec.Vec) BufferedString(water.parser.BufferedString)

Example 15 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class CStrChunkTest method test_sparse.

@Test
public void test_sparse() {
    NewChunk nc = new NewChunk(null, 0);
    for (int i = 0; i < 100; i++) nc.addNA();
    nc.addStr(new BufferedString("foo"));
    nc.addNA();
    nc.addStr(new BufferedString("bar"));
    Chunk c = nc.compress();
    Assert.assertTrue("first 100 entries are NA", c.isNA(0) && c.isNA(99));
    Assert.assertTrue("Sparse string has values", c.atStr(new BufferedString(), 100).sameString("foo"));
    Assert.assertTrue("NA", c.isNA(101));
    final BufferedString bufferedString = c.atStr(new BufferedString(), 102);
    Assert.assertTrue("Sparse string has values: expected `bar`, got " + bufferedString, bufferedString.sameString("bar"));
}
Also used : BufferedString(water.parser.BufferedString)

Aggregations

BufferedString (water.parser.BufferedString)43 Frame (water.fvec.Frame)12 Test (org.junit.Test)9 MRTask (water.MRTask)8 Vec (water.fvec.Vec)8 Chunk (water.fvec.Chunk)7 NewChunk (water.fvec.NewChunk)6 ValFrame (water.rapids.vals.ValFrame)5 IcedLong (water.util.IcedLong)5 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 TestFrameBuilder (water.fvec.TestFrameBuilder)2 BackendModel (deepwater.backends.BackendModel)1 BackendParams (deepwater.backends.BackendParams)1 RuntimeOptions (deepwater.backends.RuntimeOptions)1 ImageDataSet (deepwater.datasets.ImageDataSet)1 GenModel (hex.genmodel.GenModel)1 EasyPredictModelWrapper (hex.genmodel.easy.EasyPredictModelWrapper)1