Search in sources :

Example 1 with Frame

use of water.fvec.Frame in project h2o-2 by h2oai.

the class ChunkDemo method Chunk.

@Test
public void Chunk() {
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    //accessing the first vector from the frame
    Vec vv = fr.vec(0);
    int chunk_num = vv.nChunks();
    System.out.println("Number of chunks in column 1:  " + chunk_num);
    //Reading in the first chunk. This loads the data locally.
    Chunk cc = vv.chunkForChunkIdx(0);
    for (int i = 0; i < cc._len; i++) {
        //READING A DOUBLE ELEMENT FROM A CHUNK
        // at0 gives the local chunk index
        double d_at = cc.at0(i);
        System.out.println("double Value at chunk index " + i + ":  " + d_at);
        //READING A LONG ELEMENT FROM A CHUNK
        if (!Double.isNaN(d_at)) {
            long l_at = cc.at80(i);
            System.out.println("long Value at chunk index " + i + ":  " + l_at);
        }
        //UPDATING A DOUBLE ELEMENT TO A CHUNK
        double d = 1.23;
        double set_dval = cc.set0(i, d);
        System.out.println("Setting a double value at index " + i + " : " + set_dval);
        //UPDATING A LONG ELEMENT TO A CHUNK
        long l = 123L;
        long set_lval = cc.set0(i, l);
        System.out.println("Setting a double value at index " + i + " : " + set_lval);
    }
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();	
    //CLEANING THE KV STORE OF ALL DATA
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) Chunk(water.fvec.Chunk) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 2 with Frame

use of water.fvec.Frame in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo01 method frame_001.

@Test
public void frame_001() {
    //String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    int len = fr.numCols();
    for (int i = 0; i < len; i++) {
        Vec vv = fr.vec(i);
        // creating a new vector same as original vector filled with zeros 
        Vec output = vv.makeZero();
        // map reduce call
        FillNasWithMean lr1 = new FillNasWithMean(vv.mean()).doAll(vv, output);
        // adding the vector to the original frame
        fr.add("FilledNa" + i, output);
    }
    Log.info("frame              : " + fr);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 3 with Frame

use of water.fvec.Frame in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo03 method frame_001.

@Test
public void frame_001() {
    // String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Frame f = DKV.get(okey).get();
    int len = f.numCols();
    Vec[] vv = f.vecs();
    double[] arrayofMeans = new double[len];
    for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean();
    // map reduce call
    FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f);
    Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
    Futures fs = new Futures();
    //new frame
    Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(), fs);
    fs.blockForPending();
    //puts the new frame in the KV store
    DKV.put(fk, outputFrame, fs);
    fs.blockForPending();
    Log.info(" new output frame        : " + outputFrame);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
    outputFrame.delete();
}
Also used : Frame(water.fvec.Frame) Futures(water.Futures) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 4 with Frame

use of water.fvec.Frame in project h2o-2 by h2oai.

the class KeyDemo method frame_001.

@Test
public void frame_001() {
    int initial_keycnt0 = H2O.store_size();
    Log.info("initial key count: should be One builtin jobkey and probably a log key:             " + initial_keycnt0);
    //Log.info(H2O.STORE.toString());	
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    String fileName = "../smalldata/iris/iris.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    //Expected: fkey holds the info before parse and is null after parse, as it passes the data to 
    // okey which is null before parse. 
    // int initial_keycnt = H2O.store_size();
    Log.info("NFSFile key added to the key store, so should be plus one:  " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    Log.info("UKV fkey before parse:" + UKV.get(fkey));
    Log.info("DKV fkey before parse:" + DKV.get(fkey));
    Log.info("UKV okey before parse:" + UKV.get(okey));
    Log.info("DKV okey before parse:" + DKV.get(okey));
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Log.info("-------After parse of file ---------");
    Log.info("key count after frame parse: (5)vectors, (5)chunks, (1)vector group keys  + 2 keys (-1 NFSkey):  " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    Log.info("UKV fkey after parse:" + UKV.get(fkey));
    Log.info("DKV fkey after parse:" + DKV.get(fkey));
    Log.info("UKV okey after parse:" + UKV.get(okey));
    Log.info("DKV okey after parse:" + DKV.get(okey));
    Log.info("DKV okey get        :" + DKV.get(okey).get());
    H2O.KeySnapshot ks = H2O.KeySnapshot.globalSnapshot();
    long keyCount = ks.keys().length;
    Log.info("Global Keyset count :" + keyCount);
    Log.info("Sanity check:key count after a few prints should not change anything;But not so straightforward if more than 1 nodes: " + H2O.store_size());
    //Log.info(H2O.STORE.toString());
    //UKV.remove(okey); //this does not work use frame.delete cascade deletes the stuff
    //DKV.remove(okey);// this will remove just the header and that's all
    Frame.delete(okey);
    //Log.info(H2O.STORE.toString());
    try {
        Thread.sleep(3000);
    } catch (InterruptedException _) {
    }
    ;
    Log.info("After frame delete, just the job key, builtin job key, (and probably a log key and the vector group key) should remain: " + H2O.store_size());
//Log.info(H2O.STORE.toString());
}
Also used : Frame(water.fvec.Frame) H2O(water.H2O) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 5 with Frame

use of water.fvec.Frame in project h2o-2 by h2oai.

the class VecDemo method Vec.

@Test
public void Vec() {
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    //accessing the first vector from the frame
    Vec vv = fr.vec(0);
    int loop_indx = 0;
    if (vv.length() > 4) {
        loop_indx = 3;
    }
    //READING AN ELEMENT FROM A VEC
    for (int i = 0; i < loop_indx; i++) {
        long k = i;
        // element at index k returns a double
        double elemnt_D = vv.at(k);
        System.out.println("element at index " + k + " as double: " + elemnt_D);
        if (!Double.isNaN(elemnt_D)) {
            // element at index k returns an (rounded) int, throws if a value is missing
            long elemnt_L = vv.at8(k);
            System.out.println("element at index " + k + " as integer: " + elemnt_L);
        }
    }
    //TESTING WHETHER A VEC OF INTEGERS IS AN ENUM (AKA CATEGORICAL) OR NOT
    for (int i = 0; i < fr.numCols(); i++) {
        Vec vvec = fr.vec(i);
        //check if int
        boolean b = vvec.isInt();
        System.out.println("Is " + i + " an integer column ?" + "  " + b);
        if (b == true) {
            // check if enum
            int cardinality = vvec.cardinality();
            if (cardinality != -1) {
                System.out.println("The vector " + i + " is an enum with cardinality " + cardinality + " and domain names: ");
                //PRINTING THE LIST OF DOMAINS OF AN ENUM VEC (AKA LEVELS OF A CATEGORICAL VEC)
                for (int j = 0; j < cardinality; j++) System.out.println(vvec.domain(j));
            }
        }
    }
    /* This sets the value in a very slow way, because it takes the vector goes to the chunk that has 
		 * the row index, decompress it, updates the value and then compress it again
		*/
    for (int i = 0; i < loop_indx; i++) {
        long k = i;
        double d = 1.23;
        // set element as double
        vv.set(k, d);
        System.out.println("setting element at index " + k + " as double: " + vv.at(k));
        float f = 1.23f;
        // set element as float
        vv.set(k, f);
        System.out.println("setting element at index " + k + " as float: " + vv.at(k));
        long l = 12345678910L;
        // set element as long
        vv.set(k, l);
        System.out.println("setting element at index " + k + " as long: " + vv.at(k));
        // set element as na
        vv.setNA(k);
        System.out.println("setting element at index " + k + " as NAN: " + vv.at(k));
    }
    //UPDATING A VEC ELEMENT WITH AN ENUM VALUE THAT HAS NEVER BEEN USED BEFORE
    Vec vvenum = fr.vec(4);
    final String[] newDomain = new String[] { "x", "y", "z" };
    vvenum.changeDomain(newDomain);
    System.out.println("The changed domain names are: ");
    for (int i = 0; i < vvenum.cardinality(); i++) System.out.println(vvenum.domain(i));
    //fr.vec(4).changeDomain(newDomain);
    //ACCESSING VEC STATS THAT ARE COMPUTED AUTOMATICALLY (LIKE MIN, MAX)
    System.out.println("Min for vector 0: " + vv.min());
    System.out.println("Max for vector 0: " + vv.max());
    System.out.println("Mean for vector 0: " + vv.mean());
    System.out.println("Standard deviation for vector 0: " + vv.sigma());
    System.out.println("NA count for vector 0: " + vv.naCnt());
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();	
    //CLEANING THE KV STORE OF ALL DATA
    Frame.delete(okey);
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16