Search in sources :

Example 11 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMCategoricalTest method testCategoricalIris.

@Test
public void testCategoricalIris() throws InterruptedException, ExecutionException {
    GLRMModel model = null;
    Frame train = null;
    try {
        train = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._k = 4;
        parms._loss = GlrmLoss.Absolute;
        parms._init = GlrmInitialization.SVD;
        parms._transform = DataInfo.TransformType.NONE;
        parms._recover_svd = true;
        parms._max_iterations = 1000;
        model = new GLRM(parms).trainModel().get();
        Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
    } finally {
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 12 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testArrests.

@Test
public void testArrests() throws InterruptedException, ExecutionException {
    // Initialize using first k rows of standardized training frame
    Frame yinit = ArrayUtils.frame(ard(ard(1.24256408, 0.7828393, -0.5209066, -0.003416473), ard(0.50786248, 1.1068225, -1.2117642, 2.484202941), ard(0.07163341, 1.4788032, 0.9989801, 1.042878388)));
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    long seed = 1234;
    try {
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._gamma_x = parms._gamma_y = 0.5;
        parms._regularization_x = GlrmRegularizer.Quadratic;
        parms._regularization_y = GlrmRegularizer.Quadratic;
        parms._k = 3;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        parms._init = GlrmInitialization.User;
        parms._recover_svd = false;
        parms._user_y = yinit._key;
        parms._seed = seed;
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
    } finally {
        yinit.delete();
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 13 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testBenignSVD.

@Test
public void testBenignSVD() throws InterruptedException, ExecutionException {
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    try {
        train = parse_test_file(Key.make("benign.hex"), "smalldata/logreg/benign.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._k = 10;
        parms._gamma_x = parms._gamma_y = 0.25;
        parms._regularization_x = GlrmRegularizer.Quadratic;
        parms._regularization_y = GlrmRegularizer.Quadratic;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        parms._init = GlrmInitialization.SVD;
        parms._min_step_size = 1e-5;
        parms._recover_svd = false;
        parms._max_iterations = 2000;
        job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
    } finally {
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 14 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testArrestsMissing.

@Test
public void testArrestsMissing() throws InterruptedException, ExecutionException {
    // Expected eigenvectors and their corresponding singular values with standardized data
    double[] sval = new double[] { 11.024148, 6.964086, 4.179904, 2.915146 };
    double[][] eigvec = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
    long seed = 1234;
    Frame train = null;
    GLRMModel model = null;
    GLRMParameters parms;
    Map<Double, Double> sd_map = new TreeMap<>();
    Map<Double, Double> ev_map = new TreeMap<>();
    StringBuilder sb = new StringBuilder();
    for (double missing_fraction : new double[] { 0, 0.1, 0.25, 0.5, 0.75, 0.9 }) {
        try {
            Scope.enter();
            train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
            // Add missing values to the training data
            if (missing_fraction > 0) {
                Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
                // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
                DKV.put(frtmp._key, frtmp);
                FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
                // MissingInserter is non-blocking, must block here explicitly
                j.execImpl().get();
                // Delete the frame header (not the data)
                DKV.remove(frtmp._key);
            }
            parms = new GLRMParameters();
            parms._train = train._key;
            parms._k = train.numCols();
            parms._loss = GlrmLoss.Quadratic;
            parms._regularization_x = GlrmRegularizer.None;
            parms._regularization_y = GlrmRegularizer.None;
            parms._transform = DataInfo.TransformType.STANDARDIZE;
            parms._init = GlrmInitialization.PlusPlus;
            parms._max_iterations = 1000;
            parms._seed = seed;
            parms._recover_svd = true;
            GLRM job = new GLRM(parms);
            model = job.trainModel().get();
            Log.info(100 * missing_fraction + "% missing values: Objective = " + model._output._objective);
            double sd_err = errStddev(sval, model._output._singular_vals) / parms._k;
            double ev_err = errEigvec(eigvec, model._output._eigenvectors_raw) / parms._k;
            Log.info("Avg SSE in Std Dev = " + sd_err + "\tAvg SSE in Eigenvectors = " + ev_err);
            sd_map.put(missing_fraction, sd_err);
            ev_map.put(missing_fraction, ev_err);
            model.score(train).delete();
            ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
            Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
            Assert.assertEquals(model._output._objective, mm._numerr, TOLERANCE);
            Scope.exit();
        } finally {
            if (train != null)
                train.delete();
            if (model != null)
                model.delete();
        }
    }
    sb.append("\nMissing Fraction --> Avg SSE in Std Dev\n");
    for (String s : Arrays.toString(sd_map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
    sb.append("\n");
    sb.append("Missing Fraction --> Avg SSE in Eigenvectors\n");
    for (String s : Arrays.toString(ev_map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
    Log.info(sb.toString());
}
Also used : FrameUtils(water.util.FrameUtils) Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Aggregations

GLRMParameters (hex.glrm.GLRMModel.GLRMParameters)14 Test (org.junit.Test)14 Frame (water.fvec.Frame)14 DataInfo (hex.DataInfo)1 GlrmLoss (hex.genmodel.algos.glrm.GlrmLoss)1 GlrmRegularizer (hex.genmodel.algos.glrm.GlrmRegularizer)1 PCA (hex.pca.PCA)1 PCAModel (hex.pca.PCAModel)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 Random (java.util.Random)1 TreeMap (java.util.TreeMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 Ignore (org.junit.Ignore)1 UploadFileVec (water.fvec.UploadFileVec)1 Vec (water.fvec.Vec)1 Val (water.rapids.Val)1 FrameUtils (water.util.FrameUtils)1