Search in sources :

Example 1 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testArrestsSVD.

@Test
public void testArrestsSVD() throws InterruptedException, ExecutionException {
    // Initialize using first k rows of standardized training frame
    Frame yinit = ArrayUtils.frame(ard(ard(1.24256408, 0.7828393, -0.5209066, -0.003416473), ard(0.50786248, 1.1068225, -1.2117642, 2.484202941), ard(0.07163341, 1.4788032, 0.9989801, 1.042878388), ard(0.23234938, 0.2308680, -1.0735927, -0.184916602)));
    double[] sval = new double[] { 11.024148, 6.964086, 4.179904, 2.915146 };
    double[][] eigvec = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
    GLRMModel model = null;
    Frame train = null;
    try {
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._k = 4;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        // parms._init = GLRM.Initialization.PlusPlus;
        parms._init = GlrmInitialization.User;
        parms._user_y = yinit._key;
        parms._max_iterations = 1000;
        parms._min_step_size = 1e-8;
        parms._recover_svd = true;
        GLRM job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
        // checkStddev(sval, model._output._singular_vals, 1e-4);
        // checkEigvec(eigvec, model._output._eigenvectors_raw, 1e-4);
        model.score(train).delete();
        ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
        Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
        Assert.assertEquals(model._output._objective, mm._numerr, TOLERANCE);
    } finally {
        yinit.delete();
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 2 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testSubset.

@Ignore
@Test
public void testSubset() throws InterruptedException, ExecutionException {
    //Analogous to pyunit_subset_glrm.py
    GLRM job = null;
    GLRMModel model = null;
    Frame train;
    InputStream is;
    try {
        is = new FileInputStream(FileUtils.getFile("bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip"));
        UploadFileVec.ReadPutStats stats = new UploadFileVec.ReadPutStats();
        UploadFileVec.readPut("train", is, stats);
    } catch (Exception e) {
        e.printStackTrace();
    }
    ParseDataset.parse(Key.make("train_parsed"), Key.make("train"));
    train = DKV.getGet("train_parsed");
    try {
        Log.info("num chunks: ", train.anyVec().nChunks());
        Vec[] acs_zcta_vec = { train.vec(0).toCategoricalVec() };
        Frame acs_zcta_fr = new Frame(Key.<Frame>make("acs_zcta_fr"), new String[] { "name" }, acs_zcta_vec);
        DKV.put(acs_zcta_fr);
        train.remove(0).remove();
        DKV.put(train);
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._gamma_x = 0.25;
        parms._gamma_y = 0.5;
        parms._regularization_x = GlrmRegularizer.Quadratic;
        parms._regularization_y = GlrmRegularizer.L1;
        parms._k = 10;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        parms._max_iterations = 1;
        parms._loss = GlrmLoss.Quadratic;
        try {
            Scope.enter();
            job = new GLRM(parms);
            model = job.trainModel().get();
            String s = "(tmp= py_4 (rows (cols_py " + model._output._representation_key + " [0 1]) (tmp= py_3 (| (| (| (| (| (== (tmp= py_2 " + acs_zcta_fr._key + ") \"10065\") (== py_2 \"11219\")) (== py_2 \"66753\")) (== py_2 \"84104\")) (== py_2 \"94086\")) (== py_2 \"95014\")))))";
            Val val = Rapids.exec(s);
        } catch (Throwable t) {
            t.printStackTrace();
            throw new RuntimeException(t);
        } finally {
            acs_zcta_fr.delete();
            Scope.exit();
        }
    } catch (Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
    } finally {
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Val(water.rapids.Val) Frame(water.fvec.Frame) UploadFileVec(water.fvec.UploadFileVec) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) FileInputStream(java.io.FileInputStream) ExecutionException(java.util.concurrent.ExecutionException) UploadFileVec(water.fvec.UploadFileVec) Vec(water.fvec.Vec) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testArrestsPlusPlus.

@Test
public void testArrestsPlusPlus() throws InterruptedException, ExecutionException {
    GLRMModel model = null;
    Frame train = null;
    try {
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._k = 4;
        parms._loss = GlrmLoss.Huber;
        parms._regularization_x = GlrmRegularizer.NonNegative;
        parms._regularization_y = GlrmRegularizer.NonNegative;
        parms._gamma_x = parms._gamma_y = 1;
        parms._transform = DataInfo.TransformType.STANDARDIZE;
        parms._init = GlrmInitialization.PlusPlus;
        parms._max_iterations = 100;
        parms._min_step_size = 1e-8;
        parms._recover_svd = true;
        GLRM job = new GLRM(parms);
        model = job.trainModel().get();
        Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
    } finally {
        if (train != null)
            train.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 4 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testRegularizers.

@Test
public void testRegularizers() throws InterruptedException, ExecutionException {
    // Initialize using first 4 rows of USArrests
    Frame init = ArrayUtils.frame(ard(ard(13.2, 236, 58, 21.2), ard(10.0, 263, 48, 44.5), ard(8.1, 294, 80, 31.0), ard(8.8, 190, 50, 19.5)));
    GLRM job = null;
    GLRMModel model = null;
    Frame train = null;
    long seed = 1234;
    try {
        Scope.enter();
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        GLRMParameters parms = new GLRMParameters();
        parms._train = train._key;
        parms._k = 4;
        parms._init = GlrmInitialization.User;
        parms._user_y = init._key;
        parms._transform = DataInfo.TransformType.NONE;
        parms._recover_svd = false;
        parms._max_iterations = 1000;
        parms._seed = seed;
        Log.info("\nNon-negative matrix factorization");
        parms._gamma_x = parms._gamma_y = 1;
        parms._regularization_x = GlrmRegularizer.NonNegative;
        parms._regularization_y = GlrmRegularizer.NonNegative;
        try {
            job = new GLRM(parms);
            model = job.trainModel().get();
            Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
            Log.info("Archetypes:\n" + model._output._archetypes.toString());
            model.score(train).delete();
            ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
            Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
        } finally {
            if (model != null)
                model.delete();
        }
        Log.info("\nOrthogonal non-negative matrix factorization");
        parms._gamma_x = parms._gamma_y = 1;
        parms._regularization_x = GlrmRegularizer.OneSparse;
        parms._regularization_y = GlrmRegularizer.NonNegative;
        try {
            job = new GLRM(parms);
            model = job.trainModel().get();
            Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
            Log.info("Archetypes:\n" + model._output._archetypes.toString());
            model.score(train).delete();
            ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
            Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
        } finally {
            if (model != null)
                model.delete();
        }
        Log.info("\nQuadratic clustering (k-means)");
        parms._gamma_x = 1;
        parms._gamma_y = 0;
        parms._regularization_x = GlrmRegularizer.UnitOneSparse;
        parms._regularization_y = GlrmRegularizer.None;
        try {
            job = new GLRM(parms);
            model = job.trainModel().get();
            Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
            Log.info("Archetypes:\n" + model._output._archetypes.toString());
            model.score(train).delete();
            ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
            Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
        } finally {
            if (model != null)
                model.delete();
        }
        Log.info("\nQuadratic mixture (soft k-means)");
        parms._gamma_x = 1;
        parms._gamma_y = 0;
        parms._regularization_x = GlrmRegularizer.UnitOneSparse;
        parms._regularization_y = GlrmRegularizer.None;
        try {
            job = new GLRM(parms);
            model = job.trainModel().get();
            Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
            Log.info("Archetypes:\n" + model._output._archetypes.toString());
            model.score(train).delete();
            ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
            Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
        } finally {
            if (model != null)
                model.delete();
        }
    } finally {
        init.delete();
        if (train != null)
            train.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) Test(org.junit.Test)

Example 5 with GLRMParameters

use of hex.glrm.GLRMModel.GLRMParameters in project h2o-3 by h2oai.

the class GLRMTest method testArrestsVarianceMetrics.

// PUBDEV-3501: Variance metrics for GLRM.  I compared the variance metrics calculated by PCA
// and by GLRM to make sure they agree.
@Test
public void testArrestsVarianceMetrics() throws InterruptedException, ExecutionException {
    // Results with de-meaned training frame
    double[] stddev = new double[] { 83.732400, 14.212402, 6.489426, 2.482790 };
    double[][] eigvec = ard(ard(0.04170432, -0.04482166, 0.07989066, -0.99492173), ard(0.99522128, -0.05876003, -0.06756974, 0.03893830), ard(0.04633575, 0.97685748, -0.20054629, -0.05816914), ard(0.07515550, 0.20071807, 0.97408059, 0.07232502));
    // Results with standardized training frame
    double[] stddev_std = new double[] { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
    double[][] eigvec_std = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
    Frame train = null;
    PCAModel model = null;
    GLRMModel gmodel = null;
    try {
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        for (DataInfo.TransformType std : new DataInfo.TransformType[] { DataInfo.TransformType.DEMEAN, DataInfo.TransformType.STANDARDIZE }) {
            try {
                // build PCA
                PCAModel.PCAParameters parms = new PCAModel.PCAParameters();
                parms._train = train._key;
                parms._k = 4;
                parms._transform = std;
                parms._max_iterations = 1000;
                parms._pca_method = PCAModel.PCAParameters.Method.Power;
                model = new PCA(parms).trainModel().get();
                // build GLRM
                GLRMParameters gparms = new GLRMParameters();
                gparms._train = train._key;
                gparms._k = 4;
                gparms._transform = std;
                gparms._loss = GlrmLoss.Quadratic;
                gparms._init = GlrmInitialization.SVD;
                gparms._max_iterations = 2000;
                gparms._gamma_x = 0;
                gparms._gamma_y = 0;
                gparms._recover_svd = true;
                gmodel = new GLRM(gparms).trainModel().get();
                assert gmodel != null;
                IcedWrapper[][] pcaInfo = model._output._importance.getCellValues();
                IcedWrapper[][] glrmInfo = gmodel._output._importance.getCellValues();
                if (std == DataInfo.TransformType.DEMEAN) {
                    // check to make sure PCA generated correct results first
                    TestUtil.checkStddev(stddev, model._output._std_deviation, TOLERANCE);
                    TestUtil.checkEigvec(eigvec, model._output._eigenvectors, TOLERANCE);
                } else if (std == DataInfo.TransformType.STANDARDIZE) {
                    TestUtil.checkStddev(stddev_std, model._output._std_deviation, TOLERANCE);
                    TestUtil.checkEigvec(eigvec_std, model._output._eigenvectors, TOLERANCE);
                }
                // compare PCA and GLRM variance metrics here after we know PCA has worked correctly
                TestUtil.checkIcedArrays(model._output._importance.getCellValues(), gmodel._output._importance.getCellValues(), TOLERANCE);
            } finally {
                if (model != null)
                    model.delete();
                if (gmodel != null)
                    gmodel.delete();
            }
        }
    } finally {
        if (train != null)
            train.delete();
    }
}
Also used : PCAModel(hex.pca.PCAModel) DataInfo(hex.DataInfo) Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) PCA(hex.pca.PCA) Test(org.junit.Test)

Aggregations

GLRMParameters (hex.glrm.GLRMModel.GLRMParameters)14 Test (org.junit.Test)14 Frame (water.fvec.Frame)14 DataInfo (hex.DataInfo)1 GlrmLoss (hex.genmodel.algos.glrm.GlrmLoss)1 GlrmRegularizer (hex.genmodel.algos.glrm.GlrmRegularizer)1 PCA (hex.pca.PCA)1 PCAModel (hex.pca.PCAModel)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 Random (java.util.Random)1 TreeMap (java.util.TreeMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 Ignore (org.junit.Ignore)1 UploadFileVec (water.fvec.UploadFileVec)1 Vec (water.fvec.Vec)1 Val (water.rapids.Val)1 FrameUtils (water.util.FrameUtils)1