Search in sources :

Example 11 with TwoDimTable

use of water.util.TwoDimTable in project h2o-3 by h2oai.

the class TwoDimTableTest method run6.

@Test
public void run6() {
    TwoDimTable table = new TwoDimTable("Mixed", "stuff", new String[] { "R0", "R1", "R2", "R3" }, new String[] { "C0", "C1", "C2", "C3" }, new String[] { "string", "string", "string", "string" }, new String[] { "%s", "%s", "%s", "%s" }, "");
    table.set(3, 3, "a33");
    table.set(0, 1, "a01");
    table.set(1, 2, 1.2);
    table.set(0, 2, "a02");
    table.set(3, 0, "a30");
    String ts = table.toString();
    assertTrue(ts.length() > 0);
    Log.info(ts);
    assertTrue(table.get(3, 0).equals("a30"));
    assertTrue(table.get(1, 2).equals("1.2"));
    assertTrue(table.get(1, 3) == null);
    String json = new TwoDimTableV3().fillFromImpl(table).toJsonString();
    Log.info(json);
}
Also used : TwoDimTable(water.util.TwoDimTable) TwoDimTableV3(water.api.schemas3.TwoDimTableV3) Test(org.junit.Test) ParserTest(water.parser.ParserTest)

Example 12 with TwoDimTable

use of water.util.TwoDimTable in project h2o-3 by h2oai.

the class DeepLearningCheckpointReporting method run.

@Test
public void run() {
    Scope.enter();
    Frame frame = null;
    try {
        NFSFileVec trainfv = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
        frame = ParseDataset.parse(Key.make(), trainfv._key);
        DeepLearningParameters p = new DeepLearningParameters();
        // populate model parameters
        p._train = frame._key;
        // last column is the response
        p._response_column = "CAPSULE";
        p._activation = DeepLearningParameters.Activation.Rectifier;
        p._epochs = 4;
        p._train_samples_per_iteration = -1;
        p._score_duty_cycle = 1;
        p._score_interval = 0;
        p._overwrite_with_best_model = false;
        p._classification_stop = -1;
        p._seed = 1234;
        p._reproducible = true;
        // Convert response 'C785' to categorical (digits 1 to 10)
        int ci = frame.find("CAPSULE");
        Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
        DKV.put(frame);
        long start = System.currentTimeMillis();
        //to avoid rounding issues with printed time stamp (1 second resolution)
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
        }
        DeepLearningModel model = new DeepLearning(p).trainModel().get();
        //seconds
        long sleepTime = 5;
        try {
            Thread.sleep(sleepTime * 1000);
        } catch (InterruptedException ex) {
        }
        // checkpoint restart after sleep
        DeepLearningParameters p2 = (DeepLearningParameters) p.clone();
        p2._checkpoint = model._key;
        p2._epochs *= 2;
        DeepLearningModel model2 = null;
        try {
            model2 = new DeepLearning(p2).trainModel().get();
            long end = System.currentTimeMillis();
            TwoDimTable table = model2._output._scoring_history;
            double priorDurationDouble = 0;
            long priorTimeStampLong = 0;
            DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
            for (int i = 0; i < table.getRowDim(); ++i) {
                // Check that timestamp is correct, and growing monotonically
                String timestamp = (String) table.get(i, 0);
                long timeStampLong = fmt.parseMillis(timestamp);
                Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
                Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
                Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
                priorTimeStampLong = timeStampLong;
                // Check that duration is growing monotonically
                String duration = (String) table.get(i, 1);
                //"x.xxxx sec"
                duration = duration.substring(0, duration.length() - 4);
                try {
                    double durationDouble = Double.parseDouble(duration);
                    Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
                    Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
                    Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
                    priorDurationDouble = durationDouble;
                } catch (NumberFormatException ex) {
                //skip
                }
                // Check that epoch counting is good
                //1 epoch per step
                Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
                //1 iteration per step
                Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
            }
            try {
                // Check that duration doesn't see the sleep
                String durationBefore = (String) table.get((int) (p._epochs), 1);
                durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
                String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
                durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
                double diff = Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore);
                Assert.assertTrue("Duration must be smooth; actual " + diff + ", expected at most " + sleepTime + " (before=" + durationBefore + ", after=" + durationAfter + ")", diff < sleepTime + 1);
                // Check that time stamp does see the sleep
                String timeStampBefore = (String) table.get((int) (p._epochs), 0);
                long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
                String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
                long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
                Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
                // Check that the training speed is similar before and after checkpoint restart
                String speedBefore = (String) table.get((int) (p._epochs), 2);
                speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
                double speedBeforeDouble = Double.parseDouble(speedBefore);
                String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
                speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
                double speedAfterDouble = Double.parseDouble(speedAfter);
                //expect less than 50% change in speed
                Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
            } catch (NumberFormatException ex) {
            //skip runtimes > 1 minute (too hard to parse into seconds here...).
            }
        } finally {
            if (model != null)
                model.delete();
            if (model2 != null)
                model2.delete();
        }
    } finally {
        if (frame != null)
            frame.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) TwoDimTable(water.util.TwoDimTable) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Test(org.junit.Test)

Example 13 with TwoDimTable

use of water.util.TwoDimTable in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method checkpointReporting.

@Test
public void checkpointReporting() {
    Scope.enter();
    Frame frame = null;
    try {
        File file = FileUtils.locateFile("smalldata/logreg/prostate.csv");
        NFSFileVec trainfv = NFSFileVec.make(file);
        frame = ParseDataset.parse(Key.make(), trainfv._key);
        DeepWaterParameters p = new DeepWaterParameters();
        // populate model parameters
        p._backend = getBackend();
        p._train = frame._key;
        // last column is the response
        p._response_column = "CAPSULE";
        p._activation = DeepWaterParameters.Activation.Rectifier;
        p._epochs = 4;
        p._train_samples_per_iteration = -1;
        p._mini_batch_size = 1;
        p._score_duty_cycle = 1;
        p._score_interval = 0;
        p._overwrite_with_best_model = false;
        p._seed = 1234;
        // Convert response 'C785' to categorical (digits 1 to 10)
        int ci = frame.find("CAPSULE");
        Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
        DKV.put(frame);
        long start = System.currentTimeMillis();
        //to avoid rounding issues with printed time stamp (1 second resolution)
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ex) {
        }
        DeepWaterModel model = new DeepWater(p).trainModel().get();
        //seconds
        long sleepTime = 5;
        try {
            Thread.sleep(sleepTime * 1000);
        } catch (InterruptedException ex) {
        }
        // checkpoint restart after sleep
        DeepWaterParameters p2 = (DeepWaterParameters) p.clone();
        p2._checkpoint = model._key;
        p2._epochs *= 2;
        DeepWaterModel model2 = null;
        try {
            model2 = new DeepWater(p2).trainModel().get();
            long end = System.currentTimeMillis();
            TwoDimTable table = model2._output._scoring_history;
            double priorDurationDouble = 0;
            long priorTimeStampLong = 0;
            DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
            for (int i = 0; i < table.getRowDim(); ++i) {
                // Check that timestamp is correct, and growing monotonically
                String timestamp = (String) table.get(i, 0);
                long timeStampLong = fmt.parseMillis(timestamp);
                Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
                Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
                Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
                priorTimeStampLong = timeStampLong;
                // Check that duration is growing monotonically
                String duration = (String) table.get(i, 1);
                //"x.xxxx sec"
                duration = duration.substring(0, duration.length() - 4);
                try {
                    double durationDouble = Double.parseDouble(duration);
                    Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
                    Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
                    Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
                    priorDurationDouble = durationDouble;
                } catch (NumberFormatException ex) {
                //skip
                }
                // Check that epoch counting is good
                //1 epoch per step
                Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
                //1 iteration per step
                Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
            }
            try {
                // Check that duration doesn't see the sleep
                String durationBefore = (String) table.get((int) (p._epochs), 1);
                durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
                String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
                durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
                Assert.assertTrue("Duration must be smooth", Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore) < sleepTime + 1);
                // Check that time stamp does see the sleep
                String timeStampBefore = (String) table.get((int) (p._epochs), 0);
                long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
                String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
                long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
                Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
                // Check that the training speed is similar before and after checkpoint restart
                String speedBefore = (String) table.get((int) (p._epochs), 2);
                speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
                double speedBeforeDouble = Double.parseDouble(speedBefore);
                String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
                speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
                double speedAfterDouble = Double.parseDouble(speedAfter);
                //expect less than 50% change in speed
                Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
            } catch (NumberFormatException ex) {
            //skip runtimes > 1 minute (too hard to parse into seconds here...).
            }
        } finally {
            if (model != null)
                model.delete();
            if (model2 != null)
                model2.delete();
        }
    } finally {
        if (frame != null)
            frame.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) TwoDimTable(water.util.TwoDimTable) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 14 with TwoDimTable

use of water.util.TwoDimTable in project h2o-3 by h2oai.

the class ClusteringUtils method createCenterTable.

public static TwoDimTable createCenterTable(ClusteringModel.ClusteringOutput output, boolean standardized) {
    String name = standardized ? "Standardized Cluster Means" : "Cluster Means";
    if (output._size == null || output._names == null || output._domains == null || output._centers_raw == null || (standardized && output._centers_std_raw == null)) {
        TwoDimTable table = new TwoDimTable(name, null, new String[] { "1" }, new String[] { "C1" }, new String[] { "double" }, new String[] { "%f" }, "Centroid");
        table.set(0, 0, Double.NaN);
        return table;
    }
    String[] rowHeaders = new String[output._size.length];
    for (int i = 0; i < rowHeaders.length; i++) rowHeaders[i] = String.valueOf(i + 1);
    String[] colTypes = new String[output._names.length];
    String[] colFormats = new String[output._names.length];
    for (int i = 0; i < output._domains.length; ++i) {
        colTypes[i] = output._domains[i] == null ? "double" : "String";
        colFormats[i] = output._domains[i] == null ? "%f" : "%s";
    }
    TwoDimTable table = new TwoDimTable(name, null, rowHeaders, output._names, colTypes, colFormats, "Centroid");
    // Internal weights/folds column is included in domain length
    int domain_length = output.hasWeights() ? output._domains.length - 1 : output._domains.length;
    for (int j = 0; j < domain_length; ++j) {
        boolean string = output._domains[j] != null;
        if (string) {
            for (int i = 0; i < output._centers_raw.length; ++i) {
                table.set(i, j, output._domains[j][(int) output._centers_raw[i][j]]);
            }
        } else {
            for (int i = 0; i < output._centers_raw.length; ++i) {
                table.set(i, j, standardized ? output._centers_std_raw[i][j] : output._centers_raw[i][j]);
            }
        }
    }
    return table;
}
Also used : TwoDimTable(water.util.TwoDimTable)

Example 15 with TwoDimTable

use of water.util.TwoDimTable in project h2o-3 by h2oai.

the class TwoDimTableTest method run9.

@Test
public void run9() {
    Frame fr = null;
    try {
        int OFFSET = 5;
        int firstVal = 1;
        String data = "1\nNA\n";
        Key k1 = ParserTest.makeByteVec(data);
        Key r1 = Key.make();
        fr = ParseDataset.parse(r1, k1);
        assertTrue(fr.numRows() == 2);
        assertTrue(fr.hasNAs());
        System.out.println(fr);
        TwoDimTable table = fr.toTwoDimTable(0, 2);
        assertTrue(table.getColTypes()[0] == "long");
        assertTrue((long) table.get(0 + OFFSET, 0) == firstVal);
        try {
            // NaN can't be cast to a long
            long invalid = (long) table.get(1 + OFFSET, 0);
            assertFalse(true);
        } catch (ClassCastException ex) {
        }
        assertTrue(Double.isNaN((double) table.get(1 + OFFSET, 0)));
    } finally {
        if (fr != null)
            fr.delete();
    }
}
Also used : Frame(water.fvec.Frame) TwoDimTable(water.util.TwoDimTable) Test(org.junit.Test) ParserTest(water.parser.ParserTest)

Aggregations

TwoDimTable (water.util.TwoDimTable)30 Test (org.junit.Test)15 ParserTest (water.parser.ParserTest)10 TwoDimTableV3 (water.api.schemas3.TwoDimTableV3)8 Frame (water.fvec.Frame)8 ArrayList (java.util.ArrayList)5 PrettyPrint (water.util.PrettyPrint)5 PartialDependence (hex.PartialDependence)4 GBM (hex.tree.gbm.GBM)4 GBMModel (hex.tree.gbm.GBMModel)4 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)4 Vec (water.fvec.Vec)3 NFSFileVec (water.fvec.NFSFileVec)2 AUC2 (hex.AUC2)1 Model (hex.Model)1 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)1 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)1 Key (water.Key)1 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)1