use of water.Key in project h2o-2 by h2oai.
the class DeepLearningAutoEncoderCategoricalTest method run.
@Test
public void run() {
long seed = 0xDECAF;
Key file_train = NFSFileVec.make(find_test_file(PATH));
Frame train = ParseDataset2.parse(Key.make(), new Key[] { file_train });
DeepLearning p = new DeepLearning();
p.source = train;
p.autoencoder = true;
p.response = train.lastVec();
p.seed = seed;
p.hidden = new int[] { 100, 50, 20 };
// p.ignored_cols = new int[]{0,1,2,3,6,7,8,10}; //Optional: ignore all categoricals
// p.ignored_cols = new int[]{4,5,9}; //Optional: ignore all numericals
p.adaptive_rate = true;
p.l1 = 1e-4;
p.activation = DeepLearning.Activation.Tanh;
p.train_samples_per_iteration = -1;
p.loss = DeepLearning.Loss.MeanSquare;
p.epochs = 2;
// p.shuffle_training_data = true;
p.force_load_balance = true;
p.score_training_samples = 0;
p.score_validation_samples = 0;
// p.reproducible = true;
p.invoke();
// Verification of results
StringBuilder sb = new StringBuilder();
sb.append("Verifying results.\n");
DeepLearningModel mymodel = UKV.get(p.dest());
sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n");
// Training data
// Reconstruct data using the same helper functions and verify that self-reported MSE agrees
final Frame l2 = mymodel.scoreAutoEncoder(train);
final Vec l2vec = l2.anyVec();
sb.append("Actual mean reconstruction error: " + l2vec.mean() + "\n");
// print stats and potential outliers
double quantile = 1 - 5. / train.numRows();
sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n");
double thresh = mymodel.calcOutlierThreshold(l2vec, quantile);
for (long i = 0; i < l2vec.length(); i++) {
if (l2vec.at(i) > thresh) {
sb.append(String.format("row %d : l2vec error = %5f\n", i, l2vec.at(i)));
}
}
Log.info(sb.toString());
Assert.assertEquals(mymodel.mse(), l2vec.mean(), 1e-8);
// Create reconstruction
Log.info("Creating full reconstruction.");
final Frame recon_train = mymodel.score(train);
// cleanup
recon_train.delete();
train.delete();
p.delete();
mymodel.delete();
l2.delete();
}
use of water.Key in project h2o-3 by h2oai.
the class MakeGLMModelHandler method computeGram.
public GramV3 computeGram(int v, GramV3 input) {
if (DKV.get(input.X.key()) == null)
throw new IllegalArgumentException("Frame " + input.X.key() + " does not exist.");
Frame fr = input.X.key().get();
Frame frcpy = new Frame(fr._names.clone(), fr.vecs().clone());
String wname = null;
Vec weight = null;
if (input.W != null && !input.W.column_name.isEmpty()) {
wname = input.W.column_name;
if (fr.find(wname) == -1)
throw new IllegalArgumentException("Did not find weight vector " + wname);
weight = frcpy.remove(wname);
}
DataInfo dinfo = new DataInfo(frcpy, null, 0, input.use_all_factor_levels, input.standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, input.skip_missing, false, !input.skip_missing, /* weight */
false, /* offset */
false, /* fold */
false, /* intercept */
true);
DKV.put(dinfo);
if (weight != null)
dinfo.setWeights(wname, weight);
Gram.GramTask gt = new Gram.GramTask(null, dinfo, false, true).doAll(dinfo._adaptedFrame);
double[][] gram = gt._gram.getXX();
dinfo.remove();
String[] names = water.util.ArrayUtils.append(dinfo.coefNames(), "Intercept");
Vec[] vecs = new Vec[gram.length];
Key[] keys = new VectorGroup().addVecs(vecs.length);
for (int i = 0; i < vecs.length; ++i) vecs[i] = Vec.makeVec(gram[i], keys[i]);
input.destination_frame = new KeyV3.FrameKeyV3();
String keyname = input.X.key().toString();
if (keyname.endsWith(".hex"))
keyname = keyname.substring(0, keyname.lastIndexOf("."));
keyname = keyname + "_gram";
if (weight != null)
keyname = keyname + "_" + wname;
Key k = Key.make(keyname);
if (DKV.get(k) != null) {
int cnt = 0;
while (cnt < 1000 && DKV.get(k = Key.make(keyname + "_" + cnt)) != null) cnt++;
if (cnt == 1000)
throw new IllegalArgumentException("unable to make unique key");
}
input.destination_frame.fillFromImpl(k);
DKV.put(new Frame(k, names, vecs));
return input;
}
use of water.Key in project h2o-3 by h2oai.
the class FrameSplitterTest method test.
@Test
public void test() {
// Load data
Frame f = parse_test_file(Key.make("iris.csv"), "smalldata/iris/iris.csv");
long numRows = f.numRows();
Assert.assertEquals(150, numRows);
// Perform frame split via API
try {
SplitFrame sf = new SplitFrame(f, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
// Invoke the job
sf.exec().get();
Assert.assertTrue("The job is not in STOPPED state, but in ", sf._job.isStopped());
Key[] ksplits = sf._destination_frames;
Frame[] fsplits = new Frame[ksplits.length];
for (int i = 0; i < ksplits.length; i++) fsplits[i] = DKV.get(ksplits[i]).get();
Assert.assertEquals("Number of splits", 2, ksplits.length);
Assert.assertEquals("1. split 75rows", 75, fsplits[0].numRows());
Assert.assertEquals("2. split 75rows", 75, fsplits[1].numRows());
fsplits[0].delete();
fsplits[1].delete();
} finally {
f.delete();
}
}
use of water.Key in project h2o-2 by h2oai.
the class Frames method create.
/**
* Creates a frame programmatically.
*/
public static Frame create(String[] headers, double[][] rows) {
Futures fs = new Futures();
Vec[] vecs = new Vec[rows[0].length];
Key[] keys = new Vec.VectorGroup().addVecs(vecs.length);
for (int c = 0; c < vecs.length; c++) {
AppendableVec vec = new AppendableVec(keys[c]);
NewChunk chunk = new NewChunk(vec, 0);
for (int r = 0; r < rows.length; r++) chunk.addNum(rows[r][c]);
chunk.close(0, fs);
vecs[c] = vec.close(fs);
}
fs.blockForPending();
return new Frame(headers, vecs);
}
use of water.Key in project h2o-2 by h2oai.
the class FramDemo method Frame_1.
@Test
public void Frame_1() {
String fileName = "./cookbookData/cars_nice_header.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("cars.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
// ACCESSING A VEC FROM A FRAME
// by index
Vec vv = fr.vec(0);
// prints summary of the vec
System.out.println(vv);
// by column name
Vec vvc = fr.vec("name");
System.out.println(vvc);
// all vectors
Vec[] allVec = fr.vecs();
System.out.println("Number of vectors: " + allVec.length);
//PRINTING ALL COLUMN (AKA VEC) NAMES IN A FRAME
String[] colNames = fr.names();
for (int i = 0; i < colNames.length; i++) System.out.println("Name of vector " + i + ": " + colNames[i]);
//ADDING AN EXISTING VEC TO A FRAME
//checking number of columns in the frame
System.out.println("Number of vectors in original frame: " + fr.numCols());
//added an existing vector
fr.add("Added_vector", vv);
System.out.println("Added a vector: " + fr);
//CREATING A NEW FRAME WITH A SUBSET OF VECS FROM AN EXISTING FRAME
String[] colTosubset = { "name", "economy" };
//by specifying column names
Frame Sfr = fr.subframe(colTosubset);
System.out.println("Subframe made by specifying colnames:" + Sfr);
//here, the sub frame is not added to the kv store
// by specifying start and end indices(excludes end index)
Frame Sfr2 = fr.subframe(0, 3);
System.out.println("Subframe made by specifying indices:" + Sfr2);
String[] colTosubsetOn = { "name", "economy", "newvec1", "newvec2" };
// creates two frames- First frame contains all columns from existing frame and the new vectors filled with specified double value
Frame[] Sfr3 = fr.subframe(colTosubsetOn, 0);
// Second frame contains columns missing in the original frame and are filled with the specified constant value
System.out.println("Subframe from original frame with new constant vecs: " + Sfr3[0]);
System.out.println("Newframe with constant vecs: " + Sfr3[1]);
// REMOVING A VEC FROM A FRAME
int colToRemove = 8;
String name = fr.names()[colToRemove];
// by specifying index
fr.remove(colToRemove);
/*Waring: this command only removes the vector reference leaving behind the data in the KV store.
*Useful when a vector is owned by many frames and want to delete it from only one frame.
*Works here because vector 8 is a copy of the 1st vector in the original frame
*/
System.out.println("Frame after column " + name + " removed : " + fr);
/* fr.remove("weight"); // by specifying name
* System.out.println("Frame after column weight is removed: "+ fr);
*
* int idxsToremove[] ={6,7};
* fr.remove(idxsToremove); //removes multiple columns by specifying indices
* System.out.println("Frame after "+idxsToremove.length +" columns are removed: "+ fr);
*
* fr.remove(0, 2); // by specifying start and end indices(excludes end index)
* System.out.println("Frame after specified range of columns are removed: "+ fr);
*/
//REMOVING ALL FRAME REFERENCES TO A VEC AND RECLAIMING ITS MEMORY
UKV.remove(fr.remove("cylinders")._key);
System.out.println("Frame after column 'cylinders' is removed: " + fr);
//CREATING A NEW DOUBLE VEC FROM NOTHING AND ADDING IT TO A FRAME
/*This is efficient only when a small/few vectors needs to be generated.
* Otherwise use mapreduce
*/
Vec dv = fr.anyVec().makeZero();
Vec.Writer vw = dv.open();
for (long i = 0; i < dv.length(); ++i) vw.set(i, (double) i + 0.1);
vw.close();
fr.add("New_Double_Vec", dv);
//CREATING A NEW LONG VEC FROM NOTHING AND ADDING IT TO A FRAME
Vec lv = fr.anyVec().makeZero();
Vec.Writer lvw = lv.open();
for (long i = 0; i < lv.length(); ++i) lvw.set(i, i);
lvw.close();
fr.add("New_Long_Vec", lv);
//CREATING A NEW ENUM VEC FROM NOTHING AND ADDING IT TO A FRAME
final String[] Domain = { "a", "b", "c", "d" };
Vec ev = fr.anyVec().makeCon(lv.length(), Domain);
Vec.Writer evw = ev.open();
for (long i = 0; i < ev.length(); ++i) evw.set(i, i % 4);
evw.close();
fr.add("NewEnumvec", ev);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
//CLEANING THE KV STORE OF ALL DATA
Frame.delete(okey);
Sfr3[1].delete();
Sfr3[0].delete();
Sfr2.delete();
Sfr.delete();
//DKV.remove(okey);
}
Aggregations