Search in sources :

Example 1 with PCAModel

use of hex.pca.PCAModel in project h2o-2 by h2oai.

the class PCATest method testBasic.

@Test
public void testBasic() throws InterruptedException, ExecutionException {
    boolean standardize = true;
    PCAModel model = null;
    Frame fr = null;
    try {
        Key kraw = Key.make("basicdata.raw");
        FVecTest.makeByteVec(kraw, "x1,x2,x3\n0,1.0,-120.4\n1,0.5,89.3\n2,0.3333333,291.0\n3,0.25,-2.5\n4,0.20,-2.5\n5,0.1666667,-123.4\n6,0.1428571,-0.1\n7,0.1250000,18.3");
        fr = ParseDataset2.parse(Key.make("basicdata.hex"), new Key[] { kraw });
        Key kpca = Key.make("basicdata.pca");
        new PCA("PCA on basic small dataset", kpca, fr, 0.0, standardize).invoke();
        model = DKV.get(kpca).get();
        Job.JobState jstate = model.get_params().state;
        //HEX-1817
        Assert.assertTrue(jstate == Job.JobState.DONE);
        testHTML(model);
    } finally {
        if (fr != null)
            fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : PCAModel(hex.pca.PCAModel) Frame(water.fvec.Frame) PCA(hex.pca.PCA) Test(org.junit.Test) FVecTest(water.fvec.FVecTest)

Example 2 with PCAModel

use of hex.pca.PCAModel in project h2o-2 by h2oai.

the class PCATest method testArrests.

@Test
public void testArrests() throws InterruptedException, ExecutionException {
    double tol = 0.25;
    boolean standardize = true;
    PCAModel model = null;
    Frame fr = null;
    double[] sdev_R = { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
    double[][] eigv_R = { { -0.5358995, 0.4181809, -0.3412327, 0.64922780 }, { -0.5831836, 0.1879856, -0.2681484, -0.74340748 }, { -0.2781909, -0.8728062, -0.3780158, 0.13387773 }, { -0.5434321, -0.1673186, 0.8177779, 0.08902432 } };
    try {
        Key ksrc = Key.make("arrests.hex");
        fr = getFrameForFile(ksrc, "smalldata/pca_test/USArrests.csv", null);
        // Build PCA model on all columns
        Key kdst = Key.make("arrests.pca");
        new PCA("PCA test on USArrests", kdst, fr, tol, standardize).invoke();
        model = DKV.get(kdst).get();
        testHTML(model);
        // Compare standard deviation and eigenvectors to R results
        checkSdev(sdev_R, model.sdev());
        checkEigvec(eigv_R, model.eigVec());
    // Score original data set using PCA model
    // Key kscore = Key.make("arrests.score");
    // Frame score = PCAScoreTask.score(df, model._eigVec, kscore);
    } finally {
        if (fr != null)
            fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : PCAModel(hex.pca.PCAModel) Frame(water.fvec.Frame) PCA(hex.pca.PCA) Test(org.junit.Test) FVecTest(water.fvec.FVecTest)

Example 3 with PCAModel

use of hex.pca.PCAModel in project h2o-3 by h2oai.

the class GLRMTest method testArrestsVarianceMetrics.

// PUBDEV-3501: Variance metrics for GLRM.  I compared the variance metrics calculated by PCA
// and by GLRM to make sure they agree.
@Test
public void testArrestsVarianceMetrics() throws InterruptedException, ExecutionException {
    // Results with de-meaned training frame
    double[] stddev = new double[] { 83.732400, 14.212402, 6.489426, 2.482790 };
    double[][] eigvec = ard(ard(0.04170432, -0.04482166, 0.07989066, -0.99492173), ard(0.99522128, -0.05876003, -0.06756974, 0.03893830), ard(0.04633575, 0.97685748, -0.20054629, -0.05816914), ard(0.07515550, 0.20071807, 0.97408059, 0.07232502));
    // Results with standardized training frame
    double[] stddev_std = new double[] { 1.5748783, 0.9948694, 0.5971291, 0.4164494 };
    double[][] eigvec_std = ard(ard(-0.5358995, 0.4181809, -0.3412327, 0.64922780), ard(-0.5831836, 0.1879856, -0.2681484, -0.74340748), ard(-0.2781909, -0.8728062, -0.3780158, 0.13387773), ard(-0.5434321, -0.1673186, 0.8177779, 0.08902432));
    Frame train = null;
    PCAModel model = null;
    GLRMModel gmodel = null;
    try {
        train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
        for (DataInfo.TransformType std : new DataInfo.TransformType[] { DataInfo.TransformType.DEMEAN, DataInfo.TransformType.STANDARDIZE }) {
            try {
                // build PCA
                PCAModel.PCAParameters parms = new PCAModel.PCAParameters();
                parms._train = train._key;
                parms._k = 4;
                parms._transform = std;
                parms._max_iterations = 1000;
                parms._pca_method = PCAModel.PCAParameters.Method.Power;
                model = new PCA(parms).trainModel().get();
                // build GLRM
                GLRMParameters gparms = new GLRMParameters();
                gparms._train = train._key;
                gparms._k = 4;
                gparms._transform = std;
                gparms._loss = GlrmLoss.Quadratic;
                gparms._init = GlrmInitialization.SVD;
                gparms._max_iterations = 2000;
                gparms._gamma_x = 0;
                gparms._gamma_y = 0;
                gparms._recover_svd = true;
                gmodel = new GLRM(gparms).trainModel().get();
                assert gmodel != null;
                IcedWrapper[][] pcaInfo = model._output._importance.getCellValues();
                IcedWrapper[][] glrmInfo = gmodel._output._importance.getCellValues();
                if (std == DataInfo.TransformType.DEMEAN) {
                    // check to make sure PCA generated correct results first
                    TestUtil.checkStddev(stddev, model._output._std_deviation, TOLERANCE);
                    TestUtil.checkEigvec(eigvec, model._output._eigenvectors, TOLERANCE);
                } else if (std == DataInfo.TransformType.STANDARDIZE) {
                    TestUtil.checkStddev(stddev_std, model._output._std_deviation, TOLERANCE);
                    TestUtil.checkEigvec(eigvec_std, model._output._eigenvectors, TOLERANCE);
                }
                // compare PCA and GLRM variance metrics here after we know PCA has worked correctly
                TestUtil.checkIcedArrays(model._output._importance.getCellValues(), gmodel._output._importance.getCellValues(), TOLERANCE);
            } finally {
                if (model != null)
                    model.delete();
                if (gmodel != null)
                    gmodel.delete();
            }
        }
    } finally {
        if (train != null)
            train.delete();
    }
}
Also used : PCAModel(hex.pca.PCAModel) DataInfo(hex.DataInfo) Frame(water.fvec.Frame) GLRMParameters(hex.glrm.GLRMModel.GLRMParameters) PCA(hex.pca.PCA) Test(org.junit.Test)

Example 4 with PCAModel

use of hex.pca.PCAModel in project h2o-2 by h2oai.

the class PCATest method testLinDep.

@Test
public void testLinDep() throws InterruptedException, ExecutionException {
    Key kdata = Key.make("depdata.hex");
    PCAModel model = null;
    Frame fr = null;
    double[] sdev_R = { 1.414214, 0 };
    try {
        Key kraw = Key.make("depdata.raw");
        FVecTest.makeByteVec(kraw, "x1,x2\n0,0\n1,2\n2,4\n3,6\n4,8\n5,10");
        fr = ParseDataset2.parse(kdata, new Key[] { kraw });
        Key kpca = Key.make("depdata.pca");
        new PCA("PCA on data with dependent cols", kpca, fr, 0.0, true).invoke();
        model = DKV.get(kpca).get();
        testHTML(model);
        for (int i = 0; i < model.sdev().length; i++) Assert.assertEquals(sdev_R[i], model.sdev()[i], threshold);
    } finally {
        if (fr != null)
            fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : PCAModel(hex.pca.PCAModel) Frame(water.fvec.Frame) PCA(hex.pca.PCA) Test(org.junit.Test) FVecTest(water.fvec.FVecTest)

Aggregations

PCA (hex.pca.PCA)4 PCAModel (hex.pca.PCAModel)4 Test (org.junit.Test)4 Frame (water.fvec.Frame)4 FVecTest (water.fvec.FVecTest)3 DataInfo (hex.DataInfo)1 GLRMParameters (hex.glrm.GLRMModel.GLRMParameters)1