use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstRectangleConditionalAssignTest method testConditionalAssignString.
@Test
public void testConditionalAssignString() {
Frame fr = makeTestFrame();
Vec expected = svec("row1", "tst", "row3", "tst", "row5");
try {
Val val = Rapids.exec("(tmp= py_1 (:= data \"tst\" 3 (== (cols_py data 4) \"a\")))");
if (val instanceof ValFrame) {
Frame fr2 = val.getFrame();
assertStringVecEquals(expected, fr2.vec(3));
fr2.remove();
}
} finally {
fr.remove();
expected.remove();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstGetrowTest method TestGetrow3.
/** Test columns of various types */
@Test
public void TestGetrow3() {
Frame f = null;
Vec[] vv = null;
try {
f = ArrayUtils.frame(ar("D1", "D2"), ard(0, 1));
vv = f.vec(0).makeCons(5, 0, ar(ar("N", "Y"), ar("a", "b", "c"), null, null, null), ar(Vec.T_CAT, Vec.T_CAT, Vec.T_TIME, Vec.T_STR, Vec.T_UUID));
f.add(ar("C1", "C2", "T1", "S1", "U1"), vv);
Val v = Rapids.exec("(getrow " + f._key + ")");
assertTrue(v instanceof ValRow);
double[] row = v.getRow();
assertEquals(7, row.length);
assertArrayEquals(ard(0, 1, Double.NaN, Double.NaN, 0, Double.NaN, Double.NaN), row, 1e-8);
} finally {
if (f != null)
f.delete();
if (vv != null)
for (Vec v : vv) v.remove();
}
}
use of water.fvec.Vec in project h2o-2 by h2oai.
the class DeepLearningVsNeuralNet method compare.
@Ignore
@Test
public void compare() throws Exception {
final long seed = 0xc0ffee;
Random rng = new Random(seed);
DeepLearning.Activation[] activations = { DeepLearning.Activation.Maxout, DeepLearning.Activation.MaxoutWithDropout, DeepLearning.Activation.Tanh, DeepLearning.Activation.TanhWithDropout, DeepLearning.Activation.Rectifier, DeepLearning.Activation.RectifierWithDropout };
DeepLearning.Loss[] losses = { DeepLearning.Loss.MeanSquare, DeepLearning.Loss.CrossEntropy };
DeepLearning.InitialWeightDistribution[] dists = { DeepLearning.InitialWeightDistribution.Normal, DeepLearning.InitialWeightDistribution.Uniform, DeepLearning.InitialWeightDistribution.UniformAdaptive };
double[] initial_weight_scales = { 1e-3 + 1e-2 * rng.nextFloat() };
double[] holdout_ratios = { 0.7 + 0.2 * rng.nextFloat() };
int[][] hiddens = { { 1 }, { 1 + rng.nextInt(50) }, { 17, 13 }, { 20, 10, 5 } };
double[] rates = { 0.005 + 1e-2 * rng.nextFloat() };
int[] epochs = { 5 + rng.nextInt(5) };
double[] input_dropouts = { 0, rng.nextFloat() * 0.5 };
double p0 = 0.5 * rng.nextFloat();
long pR = 1000 + rng.nextInt(1000);
double p1 = 0.5 + 0.49 * rng.nextFloat();
double l1 = 1e-5 * rng.nextFloat();
double l2 = 1e-5 * rng.nextFloat();
// rng.nextInt(50);
float max_w2 = Float.POSITIVE_INFINITY;
double rate_annealing = 1e-7 + rng.nextFloat() * 1e-6;
boolean threaded = false;
int num_repeats = 1;
// TODO: test that Deep Learning and NeuralNet agree for Mnist dataset
// String[] files = { "smalldata/mnist/train.csv" };
// hiddens = new int[][]{ {50,50} };
// threaded = true;
// num_repeats = 5;
// TODO: test that Deep Learning and NeuralNet agree for covtype dataset
// String[] files = { "smalldata/covtype/covtype.20k.data.my" };
// hiddens = new int[][]{ {100,100} };
// epochs = new int[]{ 50 };
// threaded = true;
// num_repeats = 2;
String[] files = { "smalldata/iris/iris.csv", "smalldata/neural/two_spiral.data" };
for (DeepLearning.Activation activation : activations) {
for (DeepLearning.Loss loss : losses) {
for (DeepLearning.InitialWeightDistribution dist : dists) {
for (double scale : initial_weight_scales) {
for (double holdout_ratio : holdout_ratios) {
for (double input_dropout : input_dropouts) {
for (int[] hidden : hiddens) {
for (int epoch : epochs) {
for (double rate : rates) {
for (String file : files) {
for (boolean fast_mode : new boolean[] { true, false }) {
float reftrainerr = 0, trainerr = 0;
float reftesterr = 0, testerr = 0;
float[] a = new float[hidden.length + 2];
float[] b = new float[hidden.length + 2];
float[] ba = new float[hidden.length + 2];
float[] bb = new float[hidden.length + 2];
long numweights = 0, numbiases = 0;
for (int repeat = 0; repeat < num_repeats; ++repeat) {
long myseed = seed + repeat;
Log.info("");
Log.info("STARTING.");
Log.info("Running with " + activation.name() + " activation function and " + loss.name() + " loss function.");
Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
Log.info("Using seed " + seed);
Key kfile = NFSFileVec.make(find_test_file(file));
Frame frame = ParseDataset2.parse(Key.make(), new Key[] { kfile });
_train = sampleFrame(frame, (long) (frame.numRows() * holdout_ratio), seed);
_test = sampleFrame(frame, (long) (frame.numRows() * (1 - holdout_ratio)), seed + 1);
// Train new Deep Learning
Neurons[] neurons;
DeepLearningModel mymodel;
{
DeepLearning p = new DeepLearning();
p.source = (Frame) _train.clone();
p.response = _train.lastVec();
p.ignored_cols = null;
p.seed = myseed;
p.hidden = hidden;
p.adaptive_rate = false;
p.rho = 0;
p.epsilon = 0;
p.rate = rate;
p.activation = activation;
p.max_w2 = max_w2;
p.epochs = epoch;
p.input_dropout_ratio = input_dropout;
p.rate_annealing = rate_annealing;
p.loss = loss;
p.l1 = l1;
p.l2 = l2;
p.momentum_start = p0;
p.momentum_ramp = pR;
p.momentum_stable = p1;
p.initial_weight_distribution = dist;
p.initial_weight_scale = scale;
p.classification = true;
p.diagnostics = true;
p.validation = null;
p.quiet_mode = true;
p.fast_mode = fast_mode;
//sync once per period
p.train_samples_per_iteration = 0;
//same as old NeuralNet code
p.ignore_const_cols = false;
//same as old NeuralNet code
p.shuffle_training_data = false;
//same as old NeuralNet code
p.nesterov_accelerated_gradient = true;
//don't stop early -> need to compare against old NeuralNet code, which doesn't stop either
p.classification_stop = -1;
//keep 1 chunk for reproducibility
p.force_load_balance = false;
p.replicate_training_data = false;
p.single_node_mode = true;
p.invoke();
mymodel = UKV.get(p.dest());
neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info());
}
// Reference: NeuralNet
Layer[] ls;
NeuralNetModel refmodel;
NeuralNet p = new NeuralNet();
{
Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1);
Vec labels = _train.lastVec();
p.seed = myseed;
p.hidden = hidden;
p.rate = rate;
p.max_w2 = max_w2;
p.epochs = epoch;
p.input_dropout_ratio = input_dropout;
p.rate_annealing = rate_annealing;
p.l1 = l1;
p.l2 = l2;
p.momentum_start = p0;
p.momentum_ramp = pR;
p.momentum_stable = p1;
if (dist == DeepLearning.InitialWeightDistribution.Normal)
p.initial_weight_distribution = InitialWeightDistribution.Normal;
else if (dist == DeepLearning.InitialWeightDistribution.Uniform)
p.initial_weight_distribution = InitialWeightDistribution.Uniform;
else if (dist == DeepLearning.InitialWeightDistribution.UniformAdaptive)
p.initial_weight_distribution = InitialWeightDistribution.UniformAdaptive;
p.initial_weight_scale = scale;
p.diagnostics = true;
p.fast_mode = fast_mode;
p.classification = true;
if (loss == DeepLearning.Loss.MeanSquare)
p.loss = Loss.MeanSquare;
else if (loss == DeepLearning.Loss.CrossEntropy)
p.loss = Loss.CrossEntropy;
ls = new Layer[hidden.length + 2];
ls[0] = new Layer.VecsInput(data, null);
for (int i = 0; i < hidden.length; ++i) {
if (activation == DeepLearning.Activation.Tanh) {
p.activation = NeuralNet.Activation.Tanh;
ls[1 + i] = new Layer.Tanh(hidden[i]);
} else if (activation == DeepLearning.Activation.TanhWithDropout) {
p.activation = Activation.TanhWithDropout;
ls[1 + i] = new Layer.TanhDropout(hidden[i]);
} else if (activation == DeepLearning.Activation.Rectifier) {
p.activation = Activation.Rectifier;
ls[1 + i] = new Layer.Rectifier(hidden[i]);
} else if (activation == DeepLearning.Activation.RectifierWithDropout) {
p.activation = Activation.RectifierWithDropout;
ls[1 + i] = new Layer.RectifierDropout(hidden[i]);
} else if (activation == DeepLearning.Activation.Maxout) {
p.activation = Activation.Maxout;
ls[1 + i] = new Layer.Maxout(hidden[i]);
} else if (activation == DeepLearning.Activation.MaxoutWithDropout) {
p.activation = Activation.MaxoutWithDropout;
ls[1 + i] = new Layer.MaxoutDropout(hidden[i]);
}
}
ls[ls.length - 1] = new Layer.VecSoftmax(labels, null);
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
Trainer trainer;
if (threaded)
trainer = new Trainer.Threaded(ls, p.epochs, null, -1);
else
trainer = new Trainer.Direct(ls, p.epochs, null);
trainer.start();
trainer.join();
refmodel = new NeuralNetModel(null, null, _train, ls, p);
}
/**
* Compare MEAN weights and biases in hidden and output layer
*/
for (int n = 1; n < ls.length; ++n) {
Neurons l = neurons[n];
Layer ref = ls[n];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
a[n] += ref._w[o * l._previous._a.size() + i];
b[n] += l._w.raw()[o * l._previous._a.size() + i];
numweights++;
}
ba[n] += ref._b[o];
bb[n] += l._b.get(o);
numbiases++;
}
}
/**
* Compare predictions
* Note: Reference and H2O each do their internal data normalization,
* so we must use their "own" test data, which is assumed to be created correctly.
*/
water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix();
// Deep Learning scoring
{
//[0] is label, [1]...[4] are the probabilities
Frame fpreds = mymodel.score(_train);
CM = new water.api.ConfusionMatrix();
CM.actual = _train;
CM.vactual = _train.lastVec();
CM.predict = fpreds;
CM.vpredict = fpreds.vecs()[0];
CM.invoke();
StringBuilder sb = new StringBuilder();
trainerr += new ConfusionMatrix(CM.cm).err();
for (String s : sb.toString().split("\n")) Log.info(s);
fpreds.delete();
//[0] is label, [1]...[4] are the probabilities
Frame fpreds2 = mymodel.score(_test);
CM = new water.api.ConfusionMatrix();
CM.actual = _test;
CM.vactual = _test.lastVec();
CM.predict = fpreds2;
CM.vpredict = fpreds2.vecs()[0];
CM.invoke();
sb = new StringBuilder();
CM.toASCII(sb);
testerr += new ConfusionMatrix(CM.cm).err();
for (String s : sb.toString().split("\n")) Log.info(s);
fpreds2.delete();
}
// NeuralNet scoring
long[][] cm;
{
Log.info("\nNeuralNet Scoring:");
//training set
NeuralNet.Errors train = NeuralNet.eval(ls, 0, null);
reftrainerr += train.classification;
//test set
final Frame[] adapted = refmodel.adapt(_test, false);
Vec[] data = Utils.remove(_test.vecs(), _test.vecs().length - 1);
Vec labels = _test.vecs()[_test.vecs().length - 1];
Layer.VecsInput input = (Layer.VecsInput) ls[0];
input.vecs = data;
input._len = data[0].length();
((Layer.VecSoftmax) ls[ls.length - 1]).vec = labels;
//WARNING: only works if training set is large enough to have all classes
int classes = ls[ls.length - 1].units;
cm = new long[classes][classes];
NeuralNet.Errors test = NeuralNet.eval(ls, 0, cm);
Log.info("\nNeuralNet Confusion Matrix:");
Log.info(new ConfusionMatrix(cm).toString());
reftesterr += test.classification;
adapted[1].delete();
}
Assert.assertEquals(cm[0][0], CM.cm[0][0]);
Assert.assertEquals(cm[1][0], CM.cm[1][0]);
Assert.assertEquals(cm[0][1], CM.cm[0][1]);
Assert.assertEquals(cm[1][1], CM.cm[1][1]);
// cleanup
mymodel.delete();
refmodel.delete();
_train.delete();
_test.delete();
frame.delete();
}
trainerr /= (float) num_repeats;
reftrainerr /= (float) num_repeats;
testerr /= (float) num_repeats;
reftesterr /= (float) num_repeats;
/**
* Tolerances
*/
final float abseps = threaded ? 1e-2f : 1e-7f;
final float releps = threaded ? 1e-2f : 1e-5f;
// training set scoring
Log.info("NeuralNet train error " + reftrainerr);
Log.info("Deep Learning train error " + trainerr);
compareVal(reftrainerr, trainerr, abseps, releps);
// test set scoring
Log.info("NeuralNet test error " + reftesterr);
Log.info("Deep Learning test error " + testerr);
compareVal(reftrainerr, trainerr, abseps, releps);
// mean weights/biases
for (int n = 1; n < hidden.length + 2; ++n) {
Log.info("NeuralNet mean weight for layer " + n + ": " + a[n] / numweights);
Log.info("Deep Learning mean weight for layer " + n + ": " + b[n] / numweights);
Log.info("NeuralNet mean bias for layer " + n + ": " + ba[n] / numbiases);
Log.info("Deep Learning mean bias for layer " + n + ": " + bb[n] / numbiases);
compareVal(a[n] / numweights, b[n] / numweights, abseps, releps);
compareVal(ba[n] / numbiases, bb[n] / numbiases, abseps, releps);
}
}
}
}
}
}
}
}
}
}
}
}
}
use of water.fvec.Vec in project h2o-2 by h2oai.
the class CrossValUtils method crossValidate.
/**
* Cross-Validate a ValidatedJob
* @param job (must contain valid entries for n_folds, validation, destination_key, source, response)
*/
public static void crossValidate(Job.ValidatedJob job) {
//don't do cross-validation if the full model builder failed
if (job.state != Job.JobState.RUNNING)
return;
if (job.validation != null)
throw new IllegalArgumentException("Cannot provide validation dataset and n_folds > 0 at the same time.");
if (job.n_folds <= 1)
throw new IllegalArgumentException("n_folds must be >= 2 for cross-validation.");
final String basename = job.destination_key.toString();
long[] offsets = new long[job.n_folds + 1];
Frame[] cv_preds = new Frame[job.n_folds];
try {
for (int i = 0; i < job.n_folds; ++i) {
if (job.state != Job.JobState.RUNNING)
break;
Key[] destkeys = new Key[] { Key.make(basename + "_xval" + i + "_train"), Key.make(basename + "_xval" + i + "_holdout") };
NFoldFrameExtractor nffe = new NFoldFrameExtractor(job.source, job.n_folds, i, destkeys, Key.make());
H2O.submitTask(nffe);
Frame[] splits = nffe.getResult();
// Cross-validate individual splits
try {
//this removes the enum-ified response!
job.crossValidate(splits, cv_preds, offsets, i);
job._cv_count++;
} finally {
// clean-up the results
if (!job.keep_cross_validation_splits)
for (Frame f : splits) f.delete();
}
}
if (job.state != Job.JobState.RUNNING)
return;
final int resp_idx = job.source.find(job._responseName);
Vec response = job.source.vecs()[resp_idx];
// In the case of rebalance, rebalance response will be deleted
boolean put_back = UKV.get(job.response._key) == null;
if (put_back) {
job.response = response;
if (job.classification)
job.response = job.response.toEnum();
//put enum-ified response back to K-V store
DKV.put(job.response._key, job.response);
}
((Model) UKV.get(job.destination_key)).scoreCrossValidation(job, job.source, response, cv_preds, offsets);
if (put_back)
UKV.remove(job.response._key);
} finally {
// clean-up prediction frames for splits
for (Frame f : cv_preds) if (f != null)
f.delete();
}
}
use of water.fvec.Vec in project h2o-2 by h2oai.
the class Env method remove_and_unlock.
// Remove everything
public void remove_and_unlock() {
// Remove all shallow scopes
while (_tod > 0) popScope();
// Push changes at the outer scope into the K/V store
while (_sp > 0) {
if (isAry() && _key[_sp - 1] != null) {
// Has a K/V mapping?
// Pop w/o lowering refcnt
Frame fr = popAry();
String skey = key();
Frame fr2 = new Frame(Key.make(skey), fr._names.clone(), fr.vecs().clone());
for (int i = 0; i < fr.numCols(); i++) {
Vec v = fr.vecs()[i];
int refcnt = _refcnt.get(v)._val;
assert refcnt > 0;
if (refcnt > 1) {
// Need a deep-copy now
Vec v2 = new Frame(v).deepSlice(null, null).vecs()[0];
// Replace with private deep-copy
fr2.replace(i, v2);
// Now lower refcnt for good assertions
subRef(v);
addRef(v2);
}
// But not down to zero (do not delete items in global scope)
}
if (// Upgrade to write-lock
_locked.contains(fr2._key))
// Upgrade to write-lock
fr2.write_lock(null);
else // Clear prior & set new data
{
fr2.delete_and_lock(null);
_locked.add(fr2._key);
}
fr2.unlock(null);
// Unlocked already
_locked.remove(fr2._key);
} else {
popUncheck();
}
}
// Unlock all things that do not survive, plus also delete them
for (Key k : _locked) {
Frame fr = UKV.get(k);
// Should be atomic really
fr.unlock(null);
// Should be atomic really
fr.delete();
}
}
Aggregations