Search in sources :

Example 66 with Key

use of water.Key in project h2o-2 by h2oai.

the class DeepLearningAutoEncoderCategoricalTest method run.

@Test
public void run() {
    long seed = 0xDECAF;
    Key file_train = NFSFileVec.make(find_test_file(PATH));
    Frame train = ParseDataset2.parse(Key.make(), new Key[] { file_train });
    DeepLearning p = new DeepLearning();
    p.source = train;
    p.autoencoder = true;
    p.response = train.lastVec();
    p.seed = seed;
    p.hidden = new int[] { 100, 50, 20 };
    //    p.ignored_cols = new int[]{0,1,2,3,6,7,8,10}; //Optional: ignore all categoricals
    //    p.ignored_cols = new int[]{4,5,9}; //Optional: ignore all numericals
    p.adaptive_rate = true;
    p.l1 = 1e-4;
    p.activation = DeepLearning.Activation.Tanh;
    p.train_samples_per_iteration = -1;
    p.loss = DeepLearning.Loss.MeanSquare;
    p.epochs = 2;
    //    p.shuffle_training_data = true;
    p.force_load_balance = true;
    p.score_training_samples = 0;
    p.score_validation_samples = 0;
    //    p.reproducible = true;
    p.invoke();
    // Verification of results
    StringBuilder sb = new StringBuilder();
    sb.append("Verifying results.\n");
    DeepLearningModel mymodel = UKV.get(p.dest());
    sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n");
    // Training data
    // Reconstruct data using the same helper functions and verify that self-reported MSE agrees
    final Frame l2 = mymodel.scoreAutoEncoder(train);
    final Vec l2vec = l2.anyVec();
    sb.append("Actual   mean reconstruction error: " + l2vec.mean() + "\n");
    // print stats and potential outliers
    double quantile = 1 - 5. / train.numRows();
    sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n");
    double thresh = mymodel.calcOutlierThreshold(l2vec, quantile);
    for (long i = 0; i < l2vec.length(); i++) {
        if (l2vec.at(i) > thresh) {
            sb.append(String.format("row %d : l2vec error = %5f\n", i, l2vec.at(i)));
        }
    }
    Log.info(sb.toString());
    Assert.assertEquals(mymodel.mse(), l2vec.mean(), 1e-8);
    // Create reconstruction
    Log.info("Creating full reconstruction.");
    final Frame recon_train = mymodel.score(train);
    // cleanup
    recon_train.delete();
    train.delete();
    p.delete();
    mymodel.delete();
    l2.delete();
}
Also used : Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) DeepLearning(hex.deeplearning.DeepLearning) Key(water.Key) DeepLearningModel(hex.deeplearning.DeepLearningModel) Test(org.junit.Test)

Example 67 with Key

use of water.Key in project h2o-3 by h2oai.

the class MakeGLMModelHandler method computeGram.

public GramV3 computeGram(int v, GramV3 input) {
    if (DKV.get(input.X.key()) == null)
        throw new IllegalArgumentException("Frame " + input.X.key() + " does not exist.");
    Frame fr = input.X.key().get();
    Frame frcpy = new Frame(fr._names.clone(), fr.vecs().clone());
    String wname = null;
    Vec weight = null;
    if (input.W != null && !input.W.column_name.isEmpty()) {
        wname = input.W.column_name;
        if (fr.find(wname) == -1)
            throw new IllegalArgumentException("Did not find weight vector " + wname);
        weight = frcpy.remove(wname);
    }
    DataInfo dinfo = new DataInfo(frcpy, null, 0, input.use_all_factor_levels, input.standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, input.skip_missing, false, !input.skip_missing, /* weight */
    false, /* offset */
    false, /* fold */
    false, /* intercept */
    true);
    DKV.put(dinfo);
    if (weight != null)
        dinfo.setWeights(wname, weight);
    Gram.GramTask gt = new Gram.GramTask(null, dinfo, false, true).doAll(dinfo._adaptedFrame);
    double[][] gram = gt._gram.getXX();
    dinfo.remove();
    String[] names = water.util.ArrayUtils.append(dinfo.coefNames(), "Intercept");
    Vec[] vecs = new Vec[gram.length];
    Key[] keys = new VectorGroup().addVecs(vecs.length);
    for (int i = 0; i < vecs.length; ++i) vecs[i] = Vec.makeVec(gram[i], keys[i]);
    input.destination_frame = new KeyV3.FrameKeyV3();
    String keyname = input.X.key().toString();
    if (keyname.endsWith(".hex"))
        keyname = keyname.substring(0, keyname.lastIndexOf("."));
    keyname = keyname + "_gram";
    if (weight != null)
        keyname = keyname + "_" + wname;
    Key k = Key.make(keyname);
    if (DKV.get(k) != null) {
        int cnt = 0;
        while (cnt < 1000 && DKV.get(k = Key.make(keyname + "_" + cnt)) != null) cnt++;
        if (cnt == 1000)
            throw new IllegalArgumentException("unable to make unique key");
    }
    input.destination_frame.fillFromImpl(k);
    DKV.put(new Frame(k, names, vecs));
    return input;
}
Also used : DataInfo(hex.DataInfo) ValFrame(water.rapids.vals.ValFrame) KeyV3(water.api.schemas3.KeyV3) Gram(hex.gram.Gram) VectorGroup(water.fvec.Vec.VectorGroup) Key(water.Key)

Example 68 with Key

use of water.Key in project h2o-3 by h2oai.

the class FrameSplitterTest method test.

@Test
public void test() {
    // Load data
    Frame f = parse_test_file(Key.make("iris.csv"), "smalldata/iris/iris.csv");
    long numRows = f.numRows();
    Assert.assertEquals(150, numRows);
    // Perform frame split via API
    try {
        SplitFrame sf = new SplitFrame(f, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
        // Invoke the job
        sf.exec().get();
        Assert.assertTrue("The job is not in STOPPED state, but in ", sf._job.isStopped());
        Key[] ksplits = sf._destination_frames;
        Frame[] fsplits = new Frame[ksplits.length];
        for (int i = 0; i < ksplits.length; i++) fsplits[i] = DKV.get(ksplits[i]).get();
        Assert.assertEquals("Number of splits", 2, ksplits.length);
        Assert.assertEquals("1. split 75rows", 75, fsplits[0].numRows());
        Assert.assertEquals("2. split 75rows", 75, fsplits[1].numRows());
        fsplits[0].delete();
        fsplits[1].delete();
    } finally {
        f.delete();
    }
}
Also used : FrameTestUtil.createFrame(water.fvec.FrameTestUtil.createFrame) Frame(water.fvec.Frame) Key(water.Key) Test(org.junit.Test)

Example 69 with Key

use of water.Key in project h2o-2 by h2oai.

the class Frames method create.

/**
   * Creates a frame programmatically.
   */
public static Frame create(String[] headers, double[][] rows) {
    Futures fs = new Futures();
    Vec[] vecs = new Vec[rows[0].length];
    Key[] keys = new Vec.VectorGroup().addVecs(vecs.length);
    for (int c = 0; c < vecs.length; c++) {
        AppendableVec vec = new AppendableVec(keys[c]);
        NewChunk chunk = new NewChunk(vec, 0);
        for (int r = 0; r < rows.length; r++) chunk.addNum(rows[r][c]);
        chunk.close(0, fs);
        vecs[c] = vec.close(fs);
    }
    fs.blockForPending();
    return new Frame(headers, vecs);
}
Also used : Futures(water.Futures) Key(water.Key)

Example 70 with Key

use of water.Key in project h2o-2 by h2oai.

the class FramDemo method Frame_1.

@Test
public void Frame_1() {
    String fileName = "./cookbookData/cars_nice_header.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("cars.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    // ACCESSING A VEC FROM A FRAME
    // by index
    Vec vv = fr.vec(0);
    // prints summary of the vec 
    System.out.println(vv);
    // by column name	
    Vec vvc = fr.vec("name");
    System.out.println(vvc);
    // all vectors
    Vec[] allVec = fr.vecs();
    System.out.println("Number of vectors: " + allVec.length);
    //PRINTING ALL COLUMN (AKA VEC) NAMES IN A FRAME
    String[] colNames = fr.names();
    for (int i = 0; i < colNames.length; i++) System.out.println("Name of vector " + i + ": " + colNames[i]);
    //ADDING AN EXISTING VEC TO A FRAME
    //checking number of columns in the frame
    System.out.println("Number of vectors in original frame: " + fr.numCols());
    //added an existing vector 
    fr.add("Added_vector", vv);
    System.out.println("Added a vector: " + fr);
    //CREATING A NEW FRAME WITH A SUBSET OF VECS FROM AN EXISTING FRAME
    String[] colTosubset = { "name", "economy" };
    //by specifying column names
    Frame Sfr = fr.subframe(colTosubset);
    System.out.println("Subframe made by specifying colnames:" + Sfr);
    //here, the sub frame is not added to the kv store
    // by specifying start and end indices(excludes end index)
    Frame Sfr2 = fr.subframe(0, 3);
    System.out.println("Subframe made by specifying indices:" + Sfr2);
    String[] colTosubsetOn = { "name", "economy", "newvec1", "newvec2" };
    // creates two frames- First frame contains all columns from existing frame and the new vectors filled with specified double value
    Frame[] Sfr3 = fr.subframe(colTosubsetOn, 0);
    // Second frame contains columns missing in the original frame and are filled with the specified constant value		
    System.out.println("Subframe from original frame with new constant vecs: " + Sfr3[0]);
    System.out.println("Newframe with constant vecs: " + Sfr3[1]);
    // REMOVING A VEC FROM A FRAME
    int colToRemove = 8;
    String name = fr.names()[colToRemove];
    // by specifying index
    fr.remove(colToRemove);
    /*Waring: this command only removes the vector reference leaving behind the data in the KV store.
		 *Useful when a vector is owned by many frames and want to delete it from only one frame.
		 *Works here because vector 8 is a copy of the 1st vector in the original frame 
		*/
    System.out.println("Frame after column " + name + " removed : " + fr);
    /* fr.remove("weight");						   // by specifying name
		 * System.out.println("Frame after  column weight is removed: "+ fr);
		 *
		 * int idxsToremove[] ={6,7};					
		 * fr.remove(idxsToremove);						//removes multiple columns by specifying indices
		 * System.out.println("Frame after "+idxsToremove.length +" columns are removed: "+ fr);
		 * 
		 * fr.remove(0, 2);								   // by specifying start and end indices(excludes end index)
		 * System.out.println("Frame after  specified range of columns are removed: "+ fr);
		 */
    //REMOVING ALL FRAME REFERENCES TO A VEC AND RECLAIMING ITS MEMORY
    UKV.remove(fr.remove("cylinders")._key);
    System.out.println("Frame after  column 'cylinders' is removed: " + fr);
    //CREATING A NEW DOUBLE VEC FROM NOTHING AND ADDING IT TO A FRAME
    /*This is efficient only when a small/few vectors needs to be generated. 
		 * Otherwise use mapreduce
		 */
    Vec dv = fr.anyVec().makeZero();
    Vec.Writer vw = dv.open();
    for (long i = 0; i < dv.length(); ++i) vw.set(i, (double) i + 0.1);
    vw.close();
    fr.add("New_Double_Vec", dv);
    //CREATING A NEW LONG VEC FROM NOTHING AND ADDING IT TO A FRAME
    Vec lv = fr.anyVec().makeZero();
    Vec.Writer lvw = lv.open();
    for (long i = 0; i < lv.length(); ++i) lvw.set(i, i);
    lvw.close();
    fr.add("New_Long_Vec", lv);
    //CREATING A NEW ENUM VEC FROM NOTHING AND ADDING IT TO A FRAME
    final String[] Domain = { "a", "b", "c", "d" };
    Vec ev = fr.anyVec().makeCon(lv.length(), Domain);
    Vec.Writer evw = ev.open();
    for (long i = 0; i < ev.length(); ++i) evw.set(i, i % 4);
    evw.close();
    fr.add("NewEnumvec", ev);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    //CLEANING THE KV STORE OF ALL DATA
    Frame.delete(okey);
    Sfr3[1].delete();
    Sfr3[0].delete();
    Sfr2.delete();
    Sfr.delete();
//DKV.remove(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3