Search in sources :

Example 86 with Key

use of water.Key in project h2o-3 by h2oai.

the class DataInfoTestAdapt method testInteractionTrainTestSplitAdapt.

@Test
public void testInteractionTrainTestSplitAdapt() {
    DataInfo dinfo = null, scoreInfo = null;
    Frame fr = null, expanded = null;
    Frame[] frSplits = null, expandSplits = null;
    String[] interactions = new String[] { "class", "sepal_len" };
    boolean useAll = false;
    // golden frame is standardized before splitting, while frame we want to check would be standardized post-split (not exactly what we want!)
    boolean standardize = false;
    boolean skipMissing = true;
    try {
        fr = parse_test_file(Key.make("a.hex"), "smalldata/iris/iris_wheader.csv");
        fr.swap(3, 4);
        // here's the "golden" frame
        expanded = GLMModel.GLMOutput.expand(fr, interactions, useAll, standardize, skipMissing);
        // now split fr and expanded
        long seed;
        frSplits = ShuffleSplitFrame.shuffleSplitFrame(fr, new Key[] { Key.make(), Key.make() }, new double[] { 0.8, 0.2 }, seed = new Random().nextLong());
        expandSplits = ShuffleSplitFrame.shuffleSplitFrame(expanded, new Key[] { Key.make(), Key.make() }, new double[] { 0.8, 0.2 }, seed);
        // check1: verify splits. expand frSplits with DataInfo and check against expandSplits
        checkSplits(frSplits, expandSplits, interactions, useAll, standardize);
        // now take the test frame from frSplits, and adapt it to a DataInfo built on the train frame
        dinfo = makeInfo(frSplits[0], interactions, useAll, standardize);
        GLMModel.GLMParameters parms = new GLMModel.GLMParameters();
        parms._response_column = "petal_wid";
        Model.adaptTestForTrain(frSplits[1], null, null, dinfo._adaptedFrame.names(), dinfo._adaptedFrame.domains(), parms, true, false, interactions, null, null, false);
        scoreInfo = dinfo.scoringInfo(dinfo._adaptedFrame._names, frSplits[1]);
        checkFrame(scoreInfo, expandSplits[1]);
    } finally {
        cleanup(fr, expanded);
        cleanup(frSplits);
        cleanup(expandSplits);
        cleanup(dinfo, scoreInfo);
    }
}
Also used : ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) Frame(water.fvec.Frame) GLMModel(hex.glm.GLMModel) Random(java.util.Random) Key(water.Key) Test(org.junit.Test)

Example 87 with Key

use of water.Key in project h2o-3 by h2oai.

the class AstLevels method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    Futures fs = new Futures();
    Key[] keys = Vec.VectorGroup.VG_LEN1.addVecs(f.numCols());
    Vec[] vecs = new Vec[keys.length];
    // compute the longest vec... that's the one with the most domain levels
    int max = 0;
    for (int i = 0; i < f.numCols(); ++i) if (f.vec(i).isCategorical())
        if (max < f.vec(i).domain().length)
            max = f.vec(i).domain().length;
    final int rowLayout = Vec.ESPC.rowLayout(keys[0], new long[] { 0, max });
    for (int i = 0; i < f.numCols(); ++i) {
        AppendableVec v = new AppendableVec(keys[i], Vec.T_NUM);
        NewChunk nc = new NewChunk(v, 0);
        String[] dom = f.vec(i).domain();
        int numToPad = dom == null ? max : max - dom.length;
        if (dom != null)
            for (int j = 0; j < dom.length; ++j) nc.addNum(j);
        for (int j = 0; j < numToPad; ++j) nc.addNA();
        nc.close(0, fs);
        vecs[i] = v.close(rowLayout, fs);
        vecs[i].setDomain(dom);
    }
    fs.blockForPending();
    Frame fr2 = new Frame(vecs);
    return new ValFrame(fr2);
}
Also used : ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Futures(water.Futures) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) AppendableVec(water.fvec.AppendableVec) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 88 with Key

use of water.Key in project h2o-3 by h2oai.

the class AstMad method mad.

public static double mad(Frame f, QuantileModel.CombineMethod cm, double constant) {
    // need Frames everywhere because of QuantileModel demanding a Frame...
    Key tk = null;
    if (f._key == null) {
        DKV.put(tk = Key.make(), f = new Frame(tk, f.names(), f.vecs()));
    }
    final double median = AstMedian.median(f, cm);
    Frame abs_dev = new MRTask() {

        @Override
        public void map(Chunk c, NewChunk nc) {
            for (int i = 0; i < c._len; ++i) nc.addNum(Math.abs(c.at8(i) - median));
        }
    }.doAll(1, Vec.T_NUM, f).outputFrame();
    if (abs_dev._key == null) {
        DKV.put(tk = Key.make(), abs_dev = new Frame(tk, abs_dev.names(), abs_dev.vecs()));
    }
    double mad = AstMedian.median(abs_dev, cm);
    // drp mapping, keep vec
    DKV.remove(f._key);
    DKV.remove(abs_dev._key);
    return constant * mad;
}
Also used : Frame(water.fvec.Frame) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) Key(water.Key) NewChunk(water.fvec.NewChunk)

Example 89 with Key

use of water.Key in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo02 method frame_001.

@Test
public void frame_001() {
    String fileName = "./cookbookData/iris_withNA.csv";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Frame f = DKV.get(okey).get();
    Log.info("frame              : " + f);
    int len = f.numCols();
    Vec[] vv = f.vecs();
    double[] arrayofMeans = new double[len];
    for (int i = 0; i < len; i++) // array of means to be passed as params to map reduce task
    arrayofMeans[i] = vv[i].mean();
    Vec[] newVecs = vv[0].makeZeros(len);
    newVecs[4]._domain = vv[4]._domain;
    String[] newcolnames = { "1", "2", "3", "4", "5" };
    Frame output = frame(newcolnames, newVecs);
    // the holder frame added to original frame
    f.add(output, newcolnames);
    // map reduce call
    FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(f);
    Log.info("frame              : " + f);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 90 with Key

use of water.Key in project h2o-3 by h2oai.

the class GLMBasicTestRegression method setup.

@BeforeClass
public static void setup() throws IOException {
    stall_till_cloudsize(1);
    File f = getFile("smalldata/glm_test/cancar_logIn.csv");
    assert f.exists();
    NFSFileVec nfs = NFSFileVec.make(f);
    Key outputKey = Key.make("prostate_cat_train.hex");
    _canCarTrain = ParseDataset.parse(outputKey, nfs._key);
    _canCarTrain.add("Merit", (_merit = _canCarTrain.remove("Merit")).toCategoricalVec());
    _canCarTrain.add("Class", (_class = _canCarTrain.remove("Class")).toCategoricalVec());
    DKV.put(_canCarTrain._key, _canCarTrain);
    f = getFile("smalldata/glm_test/earinf.txt");
    nfs = NFSFileVec.make(f);
    outputKey = Key.make("earinf.hex");
    _earinf = ParseDataset.parse(outputKey, nfs._key);
    DKV.put(_earinf._key, _earinf);
    f = getFile("smalldata/glm_test/weighted.csv");
    nfs = NFSFileVec.make(f);
    outputKey = Key.make("weighted.hex");
    _weighted = ParseDataset.parse(outputKey, nfs._key);
    DKV.put(_weighted._key, _weighted);
    f = getFile("smalldata/glm_test/upsampled.csv");
    nfs = NFSFileVec.make(f);
    outputKey = Key.make("upsampled.hex");
    _upsampled = ParseDataset.parse(outputKey, nfs._key);
    DKV.put(_upsampled._key, _upsampled);
    _prostateTrain = parse_test_file("smalldata/glm_test/prostate_cat_train.csv");
    _airlines = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
    Vec v = _airlines.remove("IsDepDelayed");
    Vec v2 = v.makeCopy(null);
    _airlines.add("IsDepDelayed", v2);
    v.remove();
    DKV.put(_airlines._key, _airlines);
    //    System.out.println("made copy of vec " + v._key + " -> " + v2._key + ", in DKV? src =" + ((DKV.get(v._key) != null)) + ", dst = " + (DKV.get(v2._key) != null));
    _airlinesMM = parse_test_file(Key.make("AirlinesMM"), "smalldata/airlines/AirlinesTrainMM.csv.zip");
    v = _airlinesMM.remove("IsDepDelayed");
    _airlinesMM.add("IsDepDelayed", v.makeCopy(null));
    v.remove();
    DKV.put(_airlinesMM._key, _airlinesMM);
}
Also used : NFSFileVec(water.fvec.NFSFileVec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) BeforeClass(org.junit.BeforeClass)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3