Search in sources :

Example 1 with Key

use of water.Key in project h2o-2 by h2oai.

the class ChunkDemo method Chunk.

@Test
public void Chunk() {
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    //accessing the first vector from the frame
    Vec vv = fr.vec(0);
    int chunk_num = vv.nChunks();
    System.out.println("Number of chunks in column 1:  " + chunk_num);
    //Reading in the first chunk. This loads the data locally.
    Chunk cc = vv.chunkForChunkIdx(0);
    for (int i = 0; i < cc._len; i++) {
        //READING A DOUBLE ELEMENT FROM A CHUNK
        // at0 gives the local chunk index
        double d_at = cc.at0(i);
        System.out.println("double Value at chunk index " + i + ":  " + d_at);
        //READING A LONG ELEMENT FROM A CHUNK
        if (!Double.isNaN(d_at)) {
            long l_at = cc.at80(i);
            System.out.println("long Value at chunk index " + i + ":  " + l_at);
        }
        //UPDATING A DOUBLE ELEMENT TO A CHUNK
        double d = 1.23;
        double set_dval = cc.set0(i, d);
        System.out.println("Setting a double value at index " + i + " : " + set_dval);
        //UPDATING A LONG ELEMENT TO A CHUNK
        long l = 123L;
        long set_lval = cc.set0(i, l);
        System.out.println("Setting a double value at index " + i + " : " + set_lval);
    }
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();	
    //CLEANING THE KV STORE OF ALL DATA
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) Chunk(water.fvec.Chunk) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 2 with Key

use of water.Key in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo01 method frame_001.

@Test
public void frame_001() {
    //String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    int len = fr.numCols();
    for (int i = 0; i < len; i++) {
        Vec vv = fr.vec(i);
        // creating a new vector same as original vector filled with zeros 
        Vec output = vv.makeZero();
        // map reduce call
        FillNasWithMean lr1 = new FillNasWithMean(vv.mean()).doAll(vv, output);
        // adding the vector to the original frame
        fr.add("FilledNa" + i, output);
    }
    Log.info("frame              : " + fr);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 3 with Key

use of water.Key in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo03 method frame_001.

@Test
public void frame_001() {
    // String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Frame f = DKV.get(okey).get();
    int len = f.numCols();
    Vec[] vv = f.vecs();
    double[] arrayofMeans = new double[len];
    for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean();
    // map reduce call
    FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f);
    Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
    Futures fs = new Futures();
    //new frame
    Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(), fs);
    fs.blockForPending();
    //puts the new frame in the KV store
    DKV.put(fk, outputFrame, fs);
    fs.blockForPending();
    Log.info(" new output frame        : " + outputFrame);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
    outputFrame.delete();
}
Also used : Frame(water.fvec.Frame) Futures(water.Futures) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 4 with Key

use of water.Key in project h2o-2 by h2oai.

the class KeyDemo method frame_001.

@Test
public void frame_001() {
    int initial_keycnt0 = H2O.store_size();
    Log.info("initial key count: should be One builtin jobkey and probably a log key:             " + initial_keycnt0);
    //Log.info(H2O.STORE.toString());	
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    String fileName = "../smalldata/iris/iris.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    //Expected: fkey holds the info before parse and is null after parse, as it passes the data to 
    // okey which is null before parse. 
    // int initial_keycnt = H2O.store_size();
    Log.info("NFSFile key added to the key store, so should be plus one:  " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    Log.info("UKV fkey before parse:" + UKV.get(fkey));
    Log.info("DKV fkey before parse:" + DKV.get(fkey));
    Log.info("UKV okey before parse:" + UKV.get(okey));
    Log.info("DKV okey before parse:" + DKV.get(okey));
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Log.info("-------After parse of file ---------");
    Log.info("key count after frame parse: (5)vectors, (5)chunks, (1)vector group keys  + 2 keys (-1 NFSkey):  " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    Log.info("UKV fkey after parse:" + UKV.get(fkey));
    Log.info("DKV fkey after parse:" + DKV.get(fkey));
    Log.info("UKV okey after parse:" + UKV.get(okey));
    Log.info("DKV okey after parse:" + DKV.get(okey));
    Log.info("DKV okey get        :" + DKV.get(okey).get());
    H2O.KeySnapshot ks = H2O.KeySnapshot.globalSnapshot();
    long keyCount = ks.keys().length;
    Log.info("Global Keyset count :" + keyCount);
    Log.info("Sanity check:key count after a few prints should not change anything;But not so straightforward if more than 1 nodes: " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    //UKV.remove(okey); //this does not work use frame.delete cascade deletes the stuff
    //DKV.remove(okey);// this will remove just the header and that's all
    Frame.delete(okey);
    //Log.info(H2O.STORE.toString());
    try {
        Thread.sleep(3000);
    } catch (InterruptedException _) {
    }
    ;
    Log.info("After frame delete, just the job key, builtin job key, (and probably a log key and the vector group key) should remain: " + H2O.store_size());
//Log.info(H2O.STORE.toString());
}
Also used : Frame(water.fvec.Frame) H2O(water.H2O) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 5 with Key

use of water.Key in project h2o-2 by h2oai.

the class VecDemo method Vec.

@Test
public void Vec() {
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    //accessing the first vector from the frame
    Vec vv = fr.vec(0);
    int loop_indx = 0;
    if (vv.length() > 4) {
        loop_indx = 3;
    }
    //READING AN ELEMENT FROM A VEC
    for (int i = 0; i < loop_indx; i++) {
        long k = i;
        // element at index k returns a double
        double elemnt_D = vv.at(k);
        System.out.println("element at index " + k + " as double: " + elemnt_D);
        if (!Double.isNaN(elemnt_D)) {
            // element at index k returns an (rounded) int, throws if a value is missing
            long elemnt_L = vv.at8(k);
            System.out.println("element at index " + k + " as integer: " + elemnt_L);
        }
    }
    //TESTING WHETHER A VEC OF INTEGERS IS AN ENUM (AKA CATEGORICAL) OR NOT
    for (int i = 0; i < fr.numCols(); i++) {
        Vec vvec = fr.vec(i);
        //check if int
        boolean b = vvec.isInt();
        System.out.println("Is " + i + " an integer column ?" + "  " + b);
        if (b == true) {
            // check if enum
            int cardinality = vvec.cardinality();
            if (cardinality != -1) {
                System.out.println("The vector " + i + " is an enum with cardinality " + cardinality + " and domain names: ");
                //PRINTING THE LIST OF DOMAINS OF AN ENUM VEC (AKA LEVELS OF A CATEGORICAL VEC)
                for (int j = 0; j < cardinality; j++) System.out.println(vvec.domain(j));
            }
        }
    }
    /* This sets the value in a very slow way, because it takes the vector goes to the chunk that has 
		 * the row index, decompress it, updates the value and then compress it again
		*/
    for (int i = 0; i < loop_indx; i++) {
        long k = i;
        double d = 1.23;
        // set element as double
        vv.set(k, d);
        System.out.println("setting element at index " + k + " as double: " + vv.at(k));
        float f = 1.23f;
        // set element as float
        vv.set(k, f);
        System.out.println("setting element at index " + k + " as float: " + vv.at(k));
        long l = 12345678910L;
        // set element as long
        vv.set(k, l);
        System.out.println("setting element at index " + k + " as long: " + vv.at(k));
        // set element as na
        vv.setNA(k);
        System.out.println("setting element at index " + k + " as NAN: " + vv.at(k));
    }
    //UPDATING A VEC ELEMENT WITH AN ENUM VALUE THAT HAS NEVER BEEN USED BEFORE
    Vec vvenum = fr.vec(4);
    final String[] newDomain = new String[] { "x", "y", "z" };
    vvenum.changeDomain(newDomain);
    System.out.println("The changed domain names are: ");
    for (int i = 0; i < vvenum.cardinality(); i++) System.out.println(vvenum.domain(i));
    //fr.vec(4).changeDomain(newDomain);
    //ACCESSING VEC STATS THAT ARE COMPUTED AUTOMATICALLY (LIKE MIN, MAX)
    System.out.println("Min for vector 0: " + vv.min());
    System.out.println("Max for vector 0: " + vv.max());
    System.out.println("Mean for vector 0: " + vv.mean());
    System.out.println("Standard deviation for vector 0: " + vv.sigma());
    System.out.println("NA count for vector 0: " + vv.naCnt());
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();	
    //CLEANING THE KV STORE OF ALL DATA
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3