Search in sources :

Example 46 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstRectangleConditionalAssignTest method testConditionalAssignString.

@Test
public void testConditionalAssignString() {
    Frame fr = makeTestFrame();
    Vec expected = svec("row1", "tst", "row3", "tst", "row5");
    try {
        Val val = Rapids.exec("(tmp= py_1 (:= data \"tst\" 3 (== (cols_py data 4) \"a\")))");
        if (val instanceof ValFrame) {
            Frame fr2 = val.getFrame();
            assertStringVecEquals(expected, fr2.vec(3));
            fr2.remove();
        }
    } finally {
        fr.remove();
        expected.remove();
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 47 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstGetrowTest method TestGetrow3.

/** Test columns of various types */
@Test
public void TestGetrow3() {
    Frame f = null;
    Vec[] vv = null;
    try {
        f = ArrayUtils.frame(ar("D1", "D2"), ard(0, 1));
        vv = f.vec(0).makeCons(5, 0, ar(ar("N", "Y"), ar("a", "b", "c"), null, null, null), ar(Vec.T_CAT, Vec.T_CAT, Vec.T_TIME, Vec.T_STR, Vec.T_UUID));
        f.add(ar("C1", "C2", "T1", "S1", "U1"), vv);
        Val v = Rapids.exec("(getrow " + f._key + ")");
        assertTrue(v instanceof ValRow);
        double[] row = v.getRow();
        assertEquals(7, row.length);
        assertArrayEquals(ard(0, 1, Double.NaN, Double.NaN, 0, Double.NaN, Double.NaN), row, 1e-8);
    } finally {
        if (f != null)
            f.delete();
        if (vv != null)
            for (Vec v : vv) v.remove();
    }
}
Also used : Val(water.rapids.Val) Frame(water.fvec.Frame) ValRow(water.rapids.vals.ValRow) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 48 with Vec

use of water.fvec.Vec in project h2o-2 by h2oai.

the class DeepLearningVsNeuralNet method compare.

@Ignore
@Test
public void compare() throws Exception {
    final long seed = 0xc0ffee;
    Random rng = new Random(seed);
    DeepLearning.Activation[] activations = { DeepLearning.Activation.Maxout, DeepLearning.Activation.MaxoutWithDropout, DeepLearning.Activation.Tanh, DeepLearning.Activation.TanhWithDropout, DeepLearning.Activation.Rectifier, DeepLearning.Activation.RectifierWithDropout };
    DeepLearning.Loss[] losses = { DeepLearning.Loss.MeanSquare, DeepLearning.Loss.CrossEntropy };
    DeepLearning.InitialWeightDistribution[] dists = { DeepLearning.InitialWeightDistribution.Normal, DeepLearning.InitialWeightDistribution.Uniform, DeepLearning.InitialWeightDistribution.UniformAdaptive };
    double[] initial_weight_scales = { 1e-3 + 1e-2 * rng.nextFloat() };
    double[] holdout_ratios = { 0.7 + 0.2 * rng.nextFloat() };
    int[][] hiddens = { { 1 }, { 1 + rng.nextInt(50) }, { 17, 13 }, { 20, 10, 5 } };
    double[] rates = { 0.005 + 1e-2 * rng.nextFloat() };
    int[] epochs = { 5 + rng.nextInt(5) };
    double[] input_dropouts = { 0, rng.nextFloat() * 0.5 };
    double p0 = 0.5 * rng.nextFloat();
    long pR = 1000 + rng.nextInt(1000);
    double p1 = 0.5 + 0.49 * rng.nextFloat();
    double l1 = 1e-5 * rng.nextFloat();
    double l2 = 1e-5 * rng.nextFloat();
    // rng.nextInt(50);
    float max_w2 = Float.POSITIVE_INFINITY;
    double rate_annealing = 1e-7 + rng.nextFloat() * 1e-6;
    boolean threaded = false;
    int num_repeats = 1;
    // TODO: test that Deep Learning and NeuralNet agree for Mnist dataset
    //    String[] files = { "smalldata/mnist/train.csv" };
    //    hiddens = new int[][]{ {50,50} };
    //    threaded = true;
    //    num_repeats = 5;
    // TODO: test that Deep Learning and NeuralNet agree for covtype dataset
    //    String[] files = { "smalldata/covtype/covtype.20k.data.my" };
    //    hiddens = new int[][]{ {100,100} };
    //    epochs = new int[]{ 50 };
    //    threaded = true;
    //    num_repeats = 2;
    String[] files = { "smalldata/iris/iris.csv", "smalldata/neural/two_spiral.data" };
    for (DeepLearning.Activation activation : activations) {
        for (DeepLearning.Loss loss : losses) {
            for (DeepLearning.InitialWeightDistribution dist : dists) {
                for (double scale : initial_weight_scales) {
                    for (double holdout_ratio : holdout_ratios) {
                        for (double input_dropout : input_dropouts) {
                            for (int[] hidden : hiddens) {
                                for (int epoch : epochs) {
                                    for (double rate : rates) {
                                        for (String file : files) {
                                            for (boolean fast_mode : new boolean[] { true, false }) {
                                                float reftrainerr = 0, trainerr = 0;
                                                float reftesterr = 0, testerr = 0;
                                                float[] a = new float[hidden.length + 2];
                                                float[] b = new float[hidden.length + 2];
                                                float[] ba = new float[hidden.length + 2];
                                                float[] bb = new float[hidden.length + 2];
                                                long numweights = 0, numbiases = 0;
                                                for (int repeat = 0; repeat < num_repeats; ++repeat) {
                                                    long myseed = seed + repeat;
                                                    Log.info("");
                                                    Log.info("STARTING.");
                                                    Log.info("Running with " + activation.name() + " activation function and " + loss.name() + " loss function.");
                                                    Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
                                                    Log.info("Using seed " + seed);
                                                    Key kfile = NFSFileVec.make(find_test_file(file));
                                                    Frame frame = ParseDataset2.parse(Key.make(), new Key[] { kfile });
                                                    _train = sampleFrame(frame, (long) (frame.numRows() * holdout_ratio), seed);
                                                    _test = sampleFrame(frame, (long) (frame.numRows() * (1 - holdout_ratio)), seed + 1);
                                                    // Train new Deep Learning
                                                    Neurons[] neurons;
                                                    DeepLearningModel mymodel;
                                                    {
                                                        DeepLearning p = new DeepLearning();
                                                        p.source = (Frame) _train.clone();
                                                        p.response = _train.lastVec();
                                                        p.ignored_cols = null;
                                                        p.seed = myseed;
                                                        p.hidden = hidden;
                                                        p.adaptive_rate = false;
                                                        p.rho = 0;
                                                        p.epsilon = 0;
                                                        p.rate = rate;
                                                        p.activation = activation;
                                                        p.max_w2 = max_w2;
                                                        p.epochs = epoch;
                                                        p.input_dropout_ratio = input_dropout;
                                                        p.rate_annealing = rate_annealing;
                                                        p.loss = loss;
                                                        p.l1 = l1;
                                                        p.l2 = l2;
                                                        p.momentum_start = p0;
                                                        p.momentum_ramp = pR;
                                                        p.momentum_stable = p1;
                                                        p.initial_weight_distribution = dist;
                                                        p.initial_weight_scale = scale;
                                                        p.classification = true;
                                                        p.diagnostics = true;
                                                        p.validation = null;
                                                        p.quiet_mode = true;
                                                        p.fast_mode = fast_mode;
                                                        //sync once per period
                                                        p.train_samples_per_iteration = 0;
                                                        //same as old NeuralNet code
                                                        p.ignore_const_cols = false;
                                                        //same as old NeuralNet code
                                                        p.shuffle_training_data = false;
                                                        //same as old NeuralNet code
                                                        p.nesterov_accelerated_gradient = true;
                                                        //don't stop early -> need to compare against old NeuralNet code, which doesn't stop either
                                                        p.classification_stop = -1;
                                                        //keep 1 chunk for reproducibility
                                                        p.force_load_balance = false;
                                                        p.replicate_training_data = false;
                                                        p.single_node_mode = true;
                                                        p.invoke();
                                                        mymodel = UKV.get(p.dest());
                                                        neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info());
                                                    }
                                                    // Reference: NeuralNet
                                                    Layer[] ls;
                                                    NeuralNetModel refmodel;
                                                    NeuralNet p = new NeuralNet();
                                                    {
                                                        Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1);
                                                        Vec labels = _train.lastVec();
                                                        p.seed = myseed;
                                                        p.hidden = hidden;
                                                        p.rate = rate;
                                                        p.max_w2 = max_w2;
                                                        p.epochs = epoch;
                                                        p.input_dropout_ratio = input_dropout;
                                                        p.rate_annealing = rate_annealing;
                                                        p.l1 = l1;
                                                        p.l2 = l2;
                                                        p.momentum_start = p0;
                                                        p.momentum_ramp = pR;
                                                        p.momentum_stable = p1;
                                                        if (dist == DeepLearning.InitialWeightDistribution.Normal)
                                                            p.initial_weight_distribution = InitialWeightDistribution.Normal;
                                                        else if (dist == DeepLearning.InitialWeightDistribution.Uniform)
                                                            p.initial_weight_distribution = InitialWeightDistribution.Uniform;
                                                        else if (dist == DeepLearning.InitialWeightDistribution.UniformAdaptive)
                                                            p.initial_weight_distribution = InitialWeightDistribution.UniformAdaptive;
                                                        p.initial_weight_scale = scale;
                                                        p.diagnostics = true;
                                                        p.fast_mode = fast_mode;
                                                        p.classification = true;
                                                        if (loss == DeepLearning.Loss.MeanSquare)
                                                            p.loss = Loss.MeanSquare;
                                                        else if (loss == DeepLearning.Loss.CrossEntropy)
                                                            p.loss = Loss.CrossEntropy;
                                                        ls = new Layer[hidden.length + 2];
                                                        ls[0] = new Layer.VecsInput(data, null);
                                                        for (int i = 0; i < hidden.length; ++i) {
                                                            if (activation == DeepLearning.Activation.Tanh) {
                                                                p.activation = NeuralNet.Activation.Tanh;
                                                                ls[1 + i] = new Layer.Tanh(hidden[i]);
                                                            } else if (activation == DeepLearning.Activation.TanhWithDropout) {
                                                                p.activation = Activation.TanhWithDropout;
                                                                ls[1 + i] = new Layer.TanhDropout(hidden[i]);
                                                            } else if (activation == DeepLearning.Activation.Rectifier) {
                                                                p.activation = Activation.Rectifier;
                                                                ls[1 + i] = new Layer.Rectifier(hidden[i]);
                                                            } else if (activation == DeepLearning.Activation.RectifierWithDropout) {
                                                                p.activation = Activation.RectifierWithDropout;
                                                                ls[1 + i] = new Layer.RectifierDropout(hidden[i]);
                                                            } else if (activation == DeepLearning.Activation.Maxout) {
                                                                p.activation = Activation.Maxout;
                                                                ls[1 + i] = new Layer.Maxout(hidden[i]);
                                                            } else if (activation == DeepLearning.Activation.MaxoutWithDropout) {
                                                                p.activation = Activation.MaxoutWithDropout;
                                                                ls[1 + i] = new Layer.MaxoutDropout(hidden[i]);
                                                            }
                                                        }
                                                        ls[ls.length - 1] = new Layer.VecSoftmax(labels, null);
                                                        for (int i = 0; i < ls.length; i++) {
                                                            ls[i].init(ls, i, p);
                                                        }
                                                        Trainer trainer;
                                                        if (threaded)
                                                            trainer = new Trainer.Threaded(ls, p.epochs, null, -1);
                                                        else
                                                            trainer = new Trainer.Direct(ls, p.epochs, null);
                                                        trainer.start();
                                                        trainer.join();
                                                        refmodel = new NeuralNetModel(null, null, _train, ls, p);
                                                    }
                                                    /**
                             * Compare MEAN weights and biases in hidden and output layer
                             */
                                                    for (int n = 1; n < ls.length; ++n) {
                                                        Neurons l = neurons[n];
                                                        Layer ref = ls[n];
                                                        for (int o = 0; o < l._a.size(); o++) {
                                                            for (int i = 0; i < l._previous._a.size(); i++) {
                                                                a[n] += ref._w[o * l._previous._a.size() + i];
                                                                b[n] += l._w.raw()[o * l._previous._a.size() + i];
                                                                numweights++;
                                                            }
                                                            ba[n] += ref._b[o];
                                                            bb[n] += l._b.get(o);
                                                            numbiases++;
                                                        }
                                                    }
                                                    /**
                             * Compare predictions
                             * Note: Reference and H2O each do their internal data normalization,
                             * so we must use their "own" test data, which is assumed to be created correctly.
                             */
                                                    water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix();
                                                    // Deep Learning scoring
                                                    {
                                                        //[0] is label, [1]...[4] are the probabilities
                                                        Frame fpreds = mymodel.score(_train);
                                                        CM = new water.api.ConfusionMatrix();
                                                        CM.actual = _train;
                                                        CM.vactual = _train.lastVec();
                                                        CM.predict = fpreds;
                                                        CM.vpredict = fpreds.vecs()[0];
                                                        CM.invoke();
                                                        StringBuilder sb = new StringBuilder();
                                                        trainerr += new ConfusionMatrix(CM.cm).err();
                                                        for (String s : sb.toString().split("\n")) Log.info(s);
                                                        fpreds.delete();
                                                        //[0] is label, [1]...[4] are the probabilities
                                                        Frame fpreds2 = mymodel.score(_test);
                                                        CM = new water.api.ConfusionMatrix();
                                                        CM.actual = _test;
                                                        CM.vactual = _test.lastVec();
                                                        CM.predict = fpreds2;
                                                        CM.vpredict = fpreds2.vecs()[0];
                                                        CM.invoke();
                                                        sb = new StringBuilder();
                                                        CM.toASCII(sb);
                                                        testerr += new ConfusionMatrix(CM.cm).err();
                                                        for (String s : sb.toString().split("\n")) Log.info(s);
                                                        fpreds2.delete();
                                                    }
                                                    // NeuralNet scoring
                                                    long[][] cm;
                                                    {
                                                        Log.info("\nNeuralNet Scoring:");
                                                        //training set
                                                        NeuralNet.Errors train = NeuralNet.eval(ls, 0, null);
                                                        reftrainerr += train.classification;
                                                        //test set
                                                        final Frame[] adapted = refmodel.adapt(_test, false);
                                                        Vec[] data = Utils.remove(_test.vecs(), _test.vecs().length - 1);
                                                        Vec labels = _test.vecs()[_test.vecs().length - 1];
                                                        Layer.VecsInput input = (Layer.VecsInput) ls[0];
                                                        input.vecs = data;
                                                        input._len = data[0].length();
                                                        ((Layer.VecSoftmax) ls[ls.length - 1]).vec = labels;
                                                        //WARNING: only works if training set is large enough to have all classes
                                                        int classes = ls[ls.length - 1].units;
                                                        cm = new long[classes][classes];
                                                        NeuralNet.Errors test = NeuralNet.eval(ls, 0, cm);
                                                        Log.info("\nNeuralNet Confusion Matrix:");
                                                        Log.info(new ConfusionMatrix(cm).toString());
                                                        reftesterr += test.classification;
                                                        adapted[1].delete();
                                                    }
                                                    Assert.assertEquals(cm[0][0], CM.cm[0][0]);
                                                    Assert.assertEquals(cm[1][0], CM.cm[1][0]);
                                                    Assert.assertEquals(cm[0][1], CM.cm[0][1]);
                                                    Assert.assertEquals(cm[1][1], CM.cm[1][1]);
                                                    // cleanup
                                                    mymodel.delete();
                                                    refmodel.delete();
                                                    _train.delete();
                                                    _test.delete();
                                                    frame.delete();
                                                }
                                                trainerr /= (float) num_repeats;
                                                reftrainerr /= (float) num_repeats;
                                                testerr /= (float) num_repeats;
                                                reftesterr /= (float) num_repeats;
                                                /**
                           * Tolerances
                           */
                                                final float abseps = threaded ? 1e-2f : 1e-7f;
                                                final float releps = threaded ? 1e-2f : 1e-5f;
                                                // training set scoring
                                                Log.info("NeuralNet     train error " + reftrainerr);
                                                Log.info("Deep Learning train error " + trainerr);
                                                compareVal(reftrainerr, trainerr, abseps, releps);
                                                // test set scoring
                                                Log.info("NeuralNet     test error " + reftesterr);
                                                Log.info("Deep Learning test error " + testerr);
                                                compareVal(reftrainerr, trainerr, abseps, releps);
                                                // mean weights/biases
                                                for (int n = 1; n < hidden.length + 2; ++n) {
                                                    Log.info("NeuralNet     mean weight for layer " + n + ": " + a[n] / numweights);
                                                    Log.info("Deep Learning mean weight for layer " + n + ": " + b[n] / numweights);
                                                    Log.info("NeuralNet     mean bias for layer " + n + ": " + ba[n] / numbiases);
                                                    Log.info("Deep Learning mean bias for layer " + n + ": " + bb[n] / numbiases);
                                                    compareVal(a[n] / numweights, b[n] / numweights, abseps, releps);
                                                    compareVal(ba[n] / numbiases, bb[n] / numbiases, abseps, releps);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : Random(java.util.Random) Key(water.Key) MRUtils.sampleFrame(water.util.MRUtils.sampleFrame) Frame(water.fvec.Frame) NeuralNet(hex.NeuralNet) DeepLearning(hex.deeplearning.DeepLearning) Neurons(hex.deeplearning.Neurons) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) DeepLearningModel(hex.deeplearning.DeepLearningModel) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 49 with Vec

use of water.fvec.Vec in project h2o-2 by h2oai.

the class CrossValUtils method crossValidate.

/**
   * Cross-Validate a ValidatedJob
   * @param job (must contain valid entries for n_folds, validation, destination_key, source, response)
   */
public static void crossValidate(Job.ValidatedJob job) {
    //don't do cross-validation if the full model builder failed
    if (job.state != Job.JobState.RUNNING)
        return;
    if (job.validation != null)
        throw new IllegalArgumentException("Cannot provide validation dataset and n_folds > 0 at the same time.");
    if (job.n_folds <= 1)
        throw new IllegalArgumentException("n_folds must be >= 2 for cross-validation.");
    final String basename = job.destination_key.toString();
    long[] offsets = new long[job.n_folds + 1];
    Frame[] cv_preds = new Frame[job.n_folds];
    try {
        for (int i = 0; i < job.n_folds; ++i) {
            if (job.state != Job.JobState.RUNNING)
                break;
            Key[] destkeys = new Key[] { Key.make(basename + "_xval" + i + "_train"), Key.make(basename + "_xval" + i + "_holdout") };
            NFoldFrameExtractor nffe = new NFoldFrameExtractor(job.source, job.n_folds, i, destkeys, Key.make());
            H2O.submitTask(nffe);
            Frame[] splits = nffe.getResult();
            // Cross-validate individual splits
            try {
                //this removes the enum-ified response!
                job.crossValidate(splits, cv_preds, offsets, i);
                job._cv_count++;
            } finally {
                // clean-up the results
                if (!job.keep_cross_validation_splits)
                    for (Frame f : splits) f.delete();
            }
        }
        if (job.state != Job.JobState.RUNNING)
            return;
        final int resp_idx = job.source.find(job._responseName);
        Vec response = job.source.vecs()[resp_idx];
        // In the case of rebalance, rebalance response will be deleted
        boolean put_back = UKV.get(job.response._key) == null;
        if (put_back) {
            job.response = response;
            if (job.classification)
                job.response = job.response.toEnum();
            //put enum-ified response back to K-V store
            DKV.put(job.response._key, job.response);
        }
        ((Model) UKV.get(job.destination_key)).scoreCrossValidation(job, job.source, response, cv_preds, offsets);
        if (put_back)
            UKV.remove(job.response._key);
    } finally {
        // clean-up prediction frames for splits
        for (Frame f : cv_preds) if (f != null)
            f.delete();
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFoldFrameExtractor(hex.NFoldFrameExtractor)

Example 50 with Vec

use of water.fvec.Vec in project h2o-2 by h2oai.

the class Env method remove_and_unlock.

// Remove everything
public void remove_and_unlock() {
    // Remove all shallow scopes
    while (_tod > 0) popScope();
    // Push changes at the outer scope into the K/V store
    while (_sp > 0) {
        if (isAry() && _key[_sp - 1] != null) {
            // Has a K/V mapping?
            // Pop w/o lowering refcnt
            Frame fr = popAry();
            String skey = key();
            Frame fr2 = new Frame(Key.make(skey), fr._names.clone(), fr.vecs().clone());
            for (int i = 0; i < fr.numCols(); i++) {
                Vec v = fr.vecs()[i];
                int refcnt = _refcnt.get(v)._val;
                assert refcnt > 0;
                if (refcnt > 1) {
                    // Need a deep-copy now
                    Vec v2 = new Frame(v).deepSlice(null, null).vecs()[0];
                    // Replace with private deep-copy
                    fr2.replace(i, v2);
                    // Now lower refcnt for good assertions
                    subRef(v);
                    addRef(v2);
                }
            // But not down to zero (do not delete items in global scope)
            }
            if (// Upgrade to write-lock
            _locked.contains(fr2._key))
                // Upgrade to write-lock
                fr2.write_lock(null);
            else // Clear prior & set new data
            {
                fr2.delete_and_lock(null);
                _locked.add(fr2._key);
            }
            fr2.unlock(null);
            // Unlocked already
            _locked.remove(fr2._key);
        } else {
            popUncheck();
        }
    }
    // Unlock all things that do not survive, plus also delete them
    for (Key k : _locked) {
        Frame fr = UKV.get(k);
        // Should be atomic really
        fr.unlock(null);
        // Should be atomic really
        fr.delete();
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Key(water.Key)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9