use of water.fvec.Frame in project h2o-2 by h2oai.
the class ChunkDemo method Chunk.
@Test
public void Chunk() {
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
//accessing the first vector from the frame
Vec vv = fr.vec(0);
int chunk_num = vv.nChunks();
System.out.println("Number of chunks in column 1: " + chunk_num);
//Reading in the first chunk. This loads the data locally.
Chunk cc = vv.chunkForChunkIdx(0);
for (int i = 0; i < cc._len; i++) {
//READING A DOUBLE ELEMENT FROM A CHUNK
// at0 gives the local chunk index
double d_at = cc.at0(i);
System.out.println("double Value at chunk index " + i + ": " + d_at);
//READING A LONG ELEMENT FROM A CHUNK
if (!Double.isNaN(d_at)) {
long l_at = cc.at80(i);
System.out.println("long Value at chunk index " + i + ": " + l_at);
}
//UPDATING A DOUBLE ELEMENT TO A CHUNK
double d = 1.23;
double set_dval = cc.set0(i, d);
System.out.println("Setting a double value at index " + i + " : " + set_dval);
//UPDATING A LONG ELEMENT TO A CHUNK
long l = 123L;
long set_lval = cc.set0(i, l);
System.out.println("Setting a double value at index " + i + " : " + set_lval);
}
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
//CLEANING THE KV STORE OF ALL DATA
Frame.delete(okey);
}
use of water.fvec.Frame in project h2o-2 by h2oai.
the class FillNAsWithMeanDemo01 method frame_001.
@Test
public void frame_001() {
//String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
//String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
//String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
int len = fr.numCols();
for (int i = 0; i < len; i++) {
Vec vv = fr.vec(i);
// creating a new vector same as original vector filled with zeros
Vec output = vv.makeZero();
// map reduce call
FillNasWithMean lr1 = new FillNasWithMean(vv.mean()).doAll(vv, output);
// adding the vector to the original frame
fr.add("FilledNa" + i, output);
}
Log.info("frame : " + fr);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
Frame.delete(okey);
}
use of water.fvec.Frame in project h2o-2 by h2oai.
the class FillNAsWithMeanDemo03 method frame_001.
@Test
public void frame_001() {
// String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
//String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
//String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Frame f = DKV.get(okey).get();
int len = f.numCols();
Vec[] vv = f.vecs();
double[] arrayofMeans = new double[len];
for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean();
// map reduce call
FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f);
Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
Futures fs = new Futures();
//new frame
Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(), fs);
fs.blockForPending();
//puts the new frame in the KV store
DKV.put(fk, outputFrame, fs);
fs.blockForPending();
Log.info(" new output frame : " + outputFrame);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
Frame.delete(okey);
outputFrame.delete();
}
use of water.fvec.Frame in project h2o-2 by h2oai.
the class KeyDemo method frame_001.
@Test
public void frame_001() {
int initial_keycnt0 = H2O.store_size();
Log.info("initial key count: should be One builtin jobkey and probably a log key: " + initial_keycnt0);
//Log.info(H2O.STORE.toString());
//String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
String fileName = "../smalldata/iris/iris.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
//Expected: fkey holds the info before parse and is null after parse, as it passes the data to
// okey which is null before parse.
// int initial_keycnt = H2O.store_size();
Log.info("NFSFile key added to the key store, so should be plus one: " + H2O.store_size());
//Log.info(H2O.STORE.toString());
Log.info("UKV fkey before parse:" + UKV.get(fkey));
Log.info("DKV fkey before parse:" + DKV.get(fkey));
Log.info("UKV okey before parse:" + UKV.get(okey));
Log.info("DKV okey before parse:" + DKV.get(okey));
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Log.info("-------After parse of file ---------");
Log.info("key count after frame parse: (5)vectors, (5)chunks, (1)vector group keys + 2 keys (-1 NFSkey): " + H2O.store_size());
//Log.info(H2O.STORE.toString());
Log.info("UKV fkey after parse:" + UKV.get(fkey));
Log.info("DKV fkey after parse:" + DKV.get(fkey));
Log.info("UKV okey after parse:" + UKV.get(okey));
Log.info("DKV okey after parse:" + DKV.get(okey));
Log.info("DKV okey get :" + DKV.get(okey).get());
H2O.KeySnapshot ks = H2O.KeySnapshot.globalSnapshot();
long keyCount = ks.keys().length;
Log.info("Global Keyset count :" + keyCount);
Log.info("Sanity check:key count after a few prints should not change anything;But not so straightforward if more than 1 nodes: " + H2O.store_size());
//Log.info(H2O.STORE.toString());
//UKV.remove(okey); //this does not work use frame.delete cascade deletes the stuff
//DKV.remove(okey);// this will remove just the header and that's all
Frame.delete(okey);
//Log.info(H2O.STORE.toString());
try {
Thread.sleep(3000);
} catch (InterruptedException _) {
}
;
Log.info("After frame delete, just the job key, builtin job key, (and probably a log key and the vector group key) should remain: " + H2O.store_size());
//Log.info(H2O.STORE.toString());
}
use of water.fvec.Frame in project h2o-2 by h2oai.
the class VecDemo method Vec.
@Test
public void Vec() {
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
//accessing the first vector from the frame
Vec vv = fr.vec(0);
int loop_indx = 0;
if (vv.length() > 4) {
loop_indx = 3;
}
//READING AN ELEMENT FROM A VEC
for (int i = 0; i < loop_indx; i++) {
long k = i;
// element at index k returns a double
double elemnt_D = vv.at(k);
System.out.println("element at index " + k + " as double: " + elemnt_D);
if (!Double.isNaN(elemnt_D)) {
// element at index k returns an (rounded) int, throws if a value is missing
long elemnt_L = vv.at8(k);
System.out.println("element at index " + k + " as integer: " + elemnt_L);
}
}
//TESTING WHETHER A VEC OF INTEGERS IS AN ENUM (AKA CATEGORICAL) OR NOT
for (int i = 0; i < fr.numCols(); i++) {
Vec vvec = fr.vec(i);
//check if int
boolean b = vvec.isInt();
System.out.println("Is " + i + " an integer column ?" + " " + b);
if (b == true) {
// check if enum
int cardinality = vvec.cardinality();
if (cardinality != -1) {
System.out.println("The vector " + i + " is an enum with cardinality " + cardinality + " and domain names: ");
//PRINTING THE LIST OF DOMAINS OF AN ENUM VEC (AKA LEVELS OF A CATEGORICAL VEC)
for (int j = 0; j < cardinality; j++) System.out.println(vvec.domain(j));
}
}
}
/* This sets the value in a very slow way, because it takes the vector goes to the chunk that has
* the row index, decompress it, updates the value and then compress it again
*/
for (int i = 0; i < loop_indx; i++) {
long k = i;
double d = 1.23;
// set element as double
vv.set(k, d);
System.out.println("setting element at index " + k + " as double: " + vv.at(k));
float f = 1.23f;
// set element as float
vv.set(k, f);
System.out.println("setting element at index " + k + " as float: " + vv.at(k));
long l = 12345678910L;
// set element as long
vv.set(k, l);
System.out.println("setting element at index " + k + " as long: " + vv.at(k));
// set element as na
vv.setNA(k);
System.out.println("setting element at index " + k + " as NAN: " + vv.at(k));
}
//UPDATING A VEC ELEMENT WITH AN ENUM VALUE THAT HAS NEVER BEEN USED BEFORE
Vec vvenum = fr.vec(4);
final String[] newDomain = new String[] { "x", "y", "z" };
vvenum.changeDomain(newDomain);
System.out.println("The changed domain names are: ");
for (int i = 0; i < vvenum.cardinality(); i++) System.out.println(vvenum.domain(i));
//fr.vec(4).changeDomain(newDomain);
//ACCESSING VEC STATS THAT ARE COMPUTED AUTOMATICALLY (LIKE MIN, MAX)
System.out.println("Min for vector 0: " + vv.min());
System.out.println("Max for vector 0: " + vv.max());
System.out.println("Mean for vector 0: " + vv.mean());
System.out.println("Standard deviation for vector 0: " + vv.sigma());
System.out.println("NA count for vector 0: " + vv.naCnt());
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
//CLEANING THE KV STORE OF ALL DATA
Frame.delete(okey);
}
Aggregations