Search in sources :

Example 11 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepLearningSpiralsTest method run.

@Test
public void run() {
    Scope.enter();
    NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/junit/two_spiral.csv");
    Frame frame = ParseDataset.parse(Key.make(), nfs._key);
    Log.info(frame);
    int resp = frame.names().length - 1;
    for (boolean sparse : new boolean[] { true, false }) {
        for (boolean col_major : new boolean[] { false }) {
            if (!sparse && col_major)
                continue;
            Key model_id = Key.make();
            // build the model
            {
                DeepLearningParameters p = new DeepLearningParameters();
                p._epochs = 5000;
                p._hidden = new int[] { 100 };
                p._sparse = sparse;
                p._col_major = col_major;
                p._activation = DeepLearningParameters.Activation.Tanh;
                p._initial_weight_distribution = DeepLearningParameters.InitialWeightDistribution.Normal;
                p._initial_weight_scale = 2.5;
                p._loss = DeepLearningParameters.Loss.CrossEntropy;
                p._train = frame._key;
                p._response_column = frame.names()[resp];
                // Convert response to categorical
                Scope.track(frame.replace(resp, frame.vecs()[resp].toCategoricalVec()));
                DKV.put(frame);
                p._rho = 0.99;
                p._epsilon = 5e-3;
                //stop when reaching 0 classification error on training data
                p._classification_stop = 0;
                p._train_samples_per_iteration = 10000;
                p._stopping_rounds = 5;
                p._stopping_metric = ScoreKeeper.StoppingMetric.misclassification;
                p._score_each_iteration = true;
                p._reproducible = true;
                p._seed = 1234;
                new DeepLearning(p, model_id).trainModel().get();
            }
            // score and check result
            {
                DeepLearningModel mymodel = DKV.getGet(model_id);
                Frame pred = mymodel.score(frame);
                ModelMetricsBinomial mm = ModelMetricsBinomial.getFromDKV(mymodel, frame);
                double error = mm._auc.defaultErr();
                Log.info("Error: " + error);
                if (error > 0.1) {
                    Assert.fail("Test classification error is not <= 0.1, but " + error + ".");
                }
                Assert.assertTrue(mymodel.testJavaScoring(frame, pred, 1e-6));
                pred.delete();
                mymodel.delete();
            }
        }
    }
    frame.delete();
    Scope.exit();
}
Also used : Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) ModelMetricsBinomial(hex.ModelMetricsBinomial) Key(water.Key) Test(org.junit.Test)

Example 12 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepLearningCheckpointReporting method run.

@Test
public void run() {
    Scope.enter();
    Frame frame = null;
    try {
        NFSFileVec trainfv = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
        frame = ParseDataset.parse(Key.make(), trainfv._key);
        DeepLearningParameters p = new DeepLearningParameters();
        // populate model parameters
        p._train = frame._key;
        // last column is the response
        p._response_column = "CAPSULE";
        p._activation = DeepLearningParameters.Activation.Rectifier;
        p._epochs = 4;
        p._train_samples_per_iteration = -1;
        p._score_duty_cycle = 1;
        p._score_interval = 0;
        p._overwrite_with_best_model = false;
        p._classification_stop = -1;
        p._seed = 1234;
        p._reproducible = true;
        // Convert response 'C785' to categorical (digits 1 to 10)
        int ci = frame.find("CAPSULE");
        Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
        DKV.put(frame);
        long start = System.currentTimeMillis();
        //to avoid rounding issues with printed time stamp (1 second resolution)
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
        }
        DeepLearningModel model = new DeepLearning(p).trainModel().get();
        //seconds
        long sleepTime = 5;
        try {
            Thread.sleep(sleepTime * 1000);
        } catch (InterruptedException ex) {
        }
        // checkpoint restart after sleep
        DeepLearningParameters p2 = (DeepLearningParameters) p.clone();
        p2._checkpoint = model._key;
        p2._epochs *= 2;
        DeepLearningModel model2 = null;
        try {
            model2 = new DeepLearning(p2).trainModel().get();
            long end = System.currentTimeMillis();
            TwoDimTable table = model2._output._scoring_history;
            double priorDurationDouble = 0;
            long priorTimeStampLong = 0;
            DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
            for (int i = 0; i < table.getRowDim(); ++i) {
                // Check that timestamp is correct, and growing monotonically
                String timestamp = (String) table.get(i, 0);
                long timeStampLong = fmt.parseMillis(timestamp);
                Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
                Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
                Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
                priorTimeStampLong = timeStampLong;
                // Check that duration is growing monotonically
                String duration = (String) table.get(i, 1);
                //"x.xxxx sec"
                duration = duration.substring(0, duration.length() - 4);
                try {
                    double durationDouble = Double.parseDouble(duration);
                    Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
                    Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
                    Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
                    priorDurationDouble = durationDouble;
                } catch (NumberFormatException ex) {
                //skip
                }
                // Check that epoch counting is good
                //1 epoch per step
                Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
                //1 iteration per step
                Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
            }
            try {
                // Check that duration doesn't see the sleep
                String durationBefore = (String) table.get((int) (p._epochs), 1);
                durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
                String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
                durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
                double diff = Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore);
                Assert.assertTrue("Duration must be smooth; actual " + diff + ", expected at most " + sleepTime + " (before=" + durationBefore + ", after=" + durationAfter + ")", diff < sleepTime + 1);
                // Check that time stamp does see the sleep
                String timeStampBefore = (String) table.get((int) (p._epochs), 0);
                long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
                String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
                long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
                Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
                // Check that the training speed is similar before and after checkpoint restart
                String speedBefore = (String) table.get((int) (p._epochs), 2);
                speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
                double speedBeforeDouble = Double.parseDouble(speedBefore);
                String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
                speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
                double speedAfterDouble = Double.parseDouble(speedAfter);
                //expect less than 50% change in speed
                Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
            } catch (NumberFormatException ex) {
            //skip runtimes > 1 minute (too hard to parse into seconds here...).
            }
        } finally {
            if (model != null)
                model.delete();
            if (model2 != null)
                model2.delete();
        }
    } finally {
        if (frame != null)
            frame.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) TwoDimTable(water.util.TwoDimTable) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Test(org.junit.Test)

Example 13 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method MNISTHinton.

@Test
public void MNISTHinton() {
    Frame tr = null;
    Frame va = null;
    DeepWaterModel m = null;
    try {
        DeepWaterParameters p = new DeepWaterParameters();
        File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
        File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
        if (file != null) {
            p._response_column = "C785";
            NFSFileVec trainfv = NFSFileVec.make(file);
            tr = ParseDataset.parse(Key.make(), trainfv._key);
            NFSFileVec validfv = NFSFileVec.make(valid);
            va = ParseDataset.parse(Key.make(), validfv._key);
            for (String col : new String[] { p._response_column }) {
                Vec v = tr.remove(col);
                tr.add(col, v.toCategoricalVec());
                v.remove();
                v = va.remove(col);
                va.add(col, v.toCategoricalVec());
                v.remove();
            }
            DKV.put(tr);
            DKV.put(va);
            p._backend = getBackend();
            p._hidden = new int[] { 1024, 1024, 2048 };
            p._input_dropout_ratio = 0.1;
            p._hidden_dropout_ratios = new double[] { 0.5, 0.5, 0.5 };
            p._stopping_rounds = 0;
            p._learning_rate = 1e-3;
            p._mini_batch_size = 32;
            p._epochs = 20;
            p._train = tr._key;
            p._valid = va._key;
            DeepWater j = new DeepWater(p);
            m = j.trainModel().get();
            Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
        }
    } finally {
        if (tr != null)
            tr.remove();
        if (va != null)
            va.remove();
        if (m != null)
            m.remove();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) ModelMetricsMultinomial(hex.ModelMetricsMultinomial)

Example 14 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method MNISTLenet.

@Test
public void MNISTLenet() {
    Frame tr = null;
    Frame va = null;
    DeepWaterModel m = null;
    try {
        DeepWaterParameters p = new DeepWaterParameters();
        File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
        File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
        if (file != null) {
            p._response_column = "C785";
            NFSFileVec trainfv = NFSFileVec.make(file);
            tr = ParseDataset.parse(Key.make(), trainfv._key);
            NFSFileVec validfv = NFSFileVec.make(valid);
            va = ParseDataset.parse(Key.make(), validfv._key);
            for (String col : new String[] { p._response_column }) {
                Vec v = tr.remove(col);
                tr.add(col, v.toCategoricalVec());
                v.remove();
                v = va.remove(col);
                va.add(col, v.toCategoricalVec());
                v.remove();
            }
            DKV.put(tr);
            DKV.put(va);
            p._backend = getBackend();
            p._train = tr._key;
            p._valid = va._key;
            p._image_shape = new int[] { 28, 28 };
            //to keep it 28x28
            p._ignore_const_cols = false;
            p._channels = 1;
            p._network = lenet;
            DeepWater j = new DeepWater(p);
            m = j.trainModel().get();
            Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
        }
    } finally {
        if (tr != null)
            tr.remove();
        if (va != null)
            va.remove();
        if (m != null)
            m.remove();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) ModelMetricsMultinomial(hex.ModelMetricsMultinomial)

Example 15 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method checkpointReporting.

@Test
public void checkpointReporting() {
    Scope.enter();
    Frame frame = null;
    try {
        File file = FileUtils.locateFile("smalldata/logreg/prostate.csv");
        NFSFileVec trainfv = NFSFileVec.make(file);
        frame = ParseDataset.parse(Key.make(), trainfv._key);
        DeepWaterParameters p = new DeepWaterParameters();
        // populate model parameters
        p._backend = getBackend();
        p._train = frame._key;
        // last column is the response
        p._response_column = "CAPSULE";
        p._activation = DeepWaterParameters.Activation.Rectifier;
        p._epochs = 4;
        p._train_samples_per_iteration = -1;
        p._mini_batch_size = 1;
        p._score_duty_cycle = 1;
        p._score_interval = 0;
        p._overwrite_with_best_model = false;
        p._seed = 1234;
        // Convert response 'C785' to categorical (digits 1 to 10)
        int ci = frame.find("CAPSULE");
        Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
        DKV.put(frame);
        long start = System.currentTimeMillis();
        //to avoid rounding issues with printed time stamp (1 second resolution)
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
        }
        DeepWaterModel model = new DeepWater(p).trainModel().get();
        //seconds
        long sleepTime = 5;
        try {
            Thread.sleep(sleepTime * 1000);
        } catch (InterruptedException ex) {
        }
        // checkpoint restart after sleep
        DeepWaterParameters p2 = (DeepWaterParameters) p.clone();
        p2._checkpoint = model._key;
        p2._epochs *= 2;
        DeepWaterModel model2 = null;
        try {
            model2 = new DeepWater(p2).trainModel().get();
            long end = System.currentTimeMillis();
            TwoDimTable table = model2._output._scoring_history;
            double priorDurationDouble = 0;
            long priorTimeStampLong = 0;
            DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
            for (int i = 0; i < table.getRowDim(); ++i) {
                // Check that timestamp is correct, and growing monotonically
                String timestamp = (String) table.get(i, 0);
                long timeStampLong = fmt.parseMillis(timestamp);
                Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
                Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
                Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
                priorTimeStampLong = timeStampLong;
                // Check that duration is growing monotonically
                String duration = (String) table.get(i, 1);
                //"x.xxxx sec"
                duration = duration.substring(0, duration.length() - 4);
                try {
                    double durationDouble = Double.parseDouble(duration);
                    Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
                    Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
                    Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
                    priorDurationDouble = durationDouble;
                } catch (NumberFormatException ex) {
                //skip
                }
                // Check that epoch counting is good
                //1 epoch per step
                Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
                //1 iteration per step
                Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
            }
            try {
                // Check that duration doesn't see the sleep
                String durationBefore = (String) table.get((int) (p._epochs), 1);
                durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
                String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
                durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
                Assert.assertTrue("Duration must be smooth", Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore) < sleepTime + 1);
                // Check that time stamp does see the sleep
                String timeStampBefore = (String) table.get((int) (p._epochs), 0);
                long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
                String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
                long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
                Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
                // Check that the training speed is similar before and after checkpoint restart
                String speedBefore = (String) table.get((int) (p._epochs), 2);
                speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
                double speedBeforeDouble = Double.parseDouble(speedBefore);
                String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
                speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
                double speedAfterDouble = Double.parseDouble(speedAfter);
                //expect less than 50% change in speed
                Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
            } catch (NumberFormatException ex) {
            //skip runtimes > 1 minute (too hard to parse into seconds here...).
            }
        } finally {
            if (model != null)
                model.delete();
            if (model2 != null)
                model2.delete();
        }
    } finally {
        if (frame != null)
            frame.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) TwoDimTable(water.util.TwoDimTable) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Aggregations

NFSFileVec (water.fvec.NFSFileVec)27 Frame (water.fvec.Frame)21 Test (org.junit.Test)13 File (java.io.File)12 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)9 Vec (water.fvec.Vec)8 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)5 Key (water.Key)5 ModelMetricsMultinomial (hex.ModelMetricsMultinomial)3 FrameSplitter (hex.FrameSplitter)2 ModelMetricsBinomial (hex.ModelMetricsBinomial)2 Random (java.util.Random)2 ExecutionException (java.util.concurrent.ExecutionException)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 Ignore (org.junit.Ignore)2 FrameUtils (water.util.FrameUtils)2 TwoDimTable (water.util.TwoDimTable)2 ConfusionMatrix (hex.ConfusionMatrix)1 SplitFrame (hex.SplitFrame)1 ClassSamplingMethod (hex.deeplearning.DeepLearningModel.DeepLearningParameters.ClassSamplingMethod)1