Search in sources :

Example 1 with DenseLocalOnHeapMatrix

use of org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix in project ignite by apache.

the class OLSMultipleLinearRegressionTest method testHat.

/**
     * Test hat matrix computation
     */
@Test
public void testHat() {
    /*
         * This example is from "The Hat Matrix in Regression and ANOVA",
         * David C. Hoaglin and Roy E. Welsch,
         * The American Statistician, Vol. 32, No. 1 (Feb., 1978), pp. 17-22.
         *
         */
    double[] design = new double[] { 11.14, .499, 11.1, 12.74, .558, 8.9, 13.13, .604, 8.8, 11.51, .441, 8.9, 12.38, .550, 8.8, 12.60, .528, 9.9, 11.13, .418, 10.7, 11.7, .480, 10.5, 11.02, .406, 10.5, 11.41, .467, 10.7 };
    int nobs = 10;
    int nvars = 2;
    // Estimate the model
    OLSMultipleLinearRegression mdl = new OLSMultipleLinearRegression();
    mdl.newSampleData(design, nobs, nvars, new DenseLocalOnHeapMatrix());
    Matrix hat = mdl.calculateHat();
    // Reference data is upper half of symmetric hat matrix
    double[] refData = new double[] { .418, -.002, .079, -.274, -.046, .181, .128, .222, .050, .242, .242, .292, .136, .243, .128, -.041, .033, -.035, .004, .417, -.019, .273, .187, -.126, .044, -.153, .004, .604, .197, -.038, .168, -.022, .275, -.028, .252, .111, -.030, .019, -.010, -.010, .148, .042, .117, .012, .111, .262, .145, .277, .174, .154, .120, .168, .315, .148, .187 };
    // Check against reference data and verify symmetry
    int k = 0;
    for (int i = 0; i < 10; i++) {
        for (int j = i; j < 10; j++) {
            Assert.assertEquals(refData[k], hat.getX(i, j), 10e-3);
            Assert.assertEquals(hat.getX(i, j), hat.getX(j, i), 10e-12);
            k++;
        }
    }
    /*
         * Verify that residuals computed using the hat matrix are close to
         * what we get from direct computation, i.e. r = (I - H) y
         */
    double[] residuals = mdl.estimateResiduals();
    Matrix id = MatrixUtil.identityLike(hat, 10);
    double[] hatResiduals = id.minus(hat).times(mdl.getY()).getStorage().data();
    TestUtils.assertEquals(residuals, hatResiduals, 10e-12);
}
Also used : Matrix(org.apache.ignite.ml.math.Matrix) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Test(org.junit.Test)

Example 2 with DenseLocalOnHeapMatrix

use of org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix in project ignite by apache.

the class OLSMultipleLinearRegressionTest method testSwissFertility.

/**
     * Test R Swiss fertility dataset against R.
     * Data Source: R datasets package
     */
@Test
public void testSwissFertility() {
    double[] design = new double[] { 80.2, 17.0, 15, 12, 9.96, 83.1, 45.1, 6, 9, 84.84, 92.5, 39.7, 5, 5, 93.40, 85.8, 36.5, 12, 7, 33.77, 76.9, 43.5, 17, 15, 5.16, 76.1, 35.3, 9, 7, 90.57, 83.8, 70.2, 16, 7, 92.85, 92.4, 67.8, 14, 8, 97.16, 82.4, 53.3, 12, 7, 97.67, 82.9, 45.2, 16, 13, 91.38, 87.1, 64.5, 14, 6, 98.61, 64.1, 62.0, 21, 12, 8.52, 66.9, 67.5, 14, 7, 2.27, 68.9, 60.7, 19, 12, 4.43, 61.7, 69.3, 22, 5, 2.82, 68.3, 72.6, 18, 2, 24.20, 71.7, 34.0, 17, 8, 3.30, 55.7, 19.4, 26, 28, 12.11, 54.3, 15.2, 31, 20, 2.15, 65.1, 73.0, 19, 9, 2.84, 65.5, 59.8, 22, 10, 5.23, 65.0, 55.1, 14, 3, 4.52, 56.6, 50.9, 22, 12, 15.14, 57.4, 54.1, 20, 6, 4.20, 72.5, 71.2, 12, 1, 2.40, 74.2, 58.1, 14, 8, 5.23, 72.0, 63.5, 6, 3, 2.56, 60.5, 60.8, 16, 10, 7.72, 58.3, 26.8, 25, 19, 18.46, 65.4, 49.5, 15, 8, 6.10, 75.5, 85.9, 3, 2, 99.71, 69.3, 84.9, 7, 6, 99.68, 77.3, 89.7, 5, 2, 100.00, 70.5, 78.2, 12, 6, 98.96, 79.4, 64.9, 7, 3, 98.22, 65.0, 75.9, 9, 9, 99.06, 92.2, 84.6, 3, 3, 99.46, 79.3, 63.1, 13, 13, 96.83, 70.4, 38.4, 26, 12, 5.62, 65.7, 7.7, 29, 11, 13.79, 72.7, 16.7, 22, 13, 11.22, 64.4, 17.6, 35, 32, 16.92, 77.6, 37.6, 15, 7, 4.97, 67.6, 18.7, 25, 7, 8.65, 35.0, 1.2, 37, 53, 42.34, 44.7, 46.6, 16, 29, 50.43, 42.8, 27.7, 22, 29, 58.33 };
    final int nobs = 47;
    final int nvars = 4;
    // Estimate the model
    OLSMultipleLinearRegression mdl = new OLSMultipleLinearRegression();
    mdl.newSampleData(design, nobs, nvars, new DenseLocalOnHeapMatrix());
    // Check expected beta values from R
    double[] betaHat = mdl.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat, new double[] { 91.05542390271397, -0.22064551045715, -0.26058239824328, -0.96161238456030, 0.12441843147162 }, 1E-12);
    // Check expected residuals from R
    double[] residuals = mdl.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[] { 7.1044267859730512, 1.6580347433531366, 4.6944952770029644, 8.4548022690166160, 13.6547432343186212, -9.3586864458500774, 7.5822446330520386, 15.5568995563859289, 0.8113090736598980, 7.1186762732484308, 7.4251378771228724, 2.6761316873234109, 0.8351584810309354, 7.1769991119615177, -3.8746753206299553, -3.1337779476387251, -0.1412575244091504, 1.1186809170469780, -6.3588097346816594, 3.4039270429434074, 2.3374058329820175, -7.9272368576900503, -7.8361010968497959, -11.2597369269357070, 0.9445333697827101, 6.6544245101380328, -0.9146136301118665, -4.3152449403848570, -4.3536932047009183, -3.8907885169304661, -6.3027643926302188, -7.8308982189289091, -3.1792280015332750, -6.7167298771158226, -4.8469946718041754, -10.6335664353633685, 11.1031134362036958, 6.0084032641811733, 5.4326230830188482, -7.2375578629692230, 2.1671550814448222, 15.0147574652763112, 4.8625103516321015, -7.1597256413907706, -0.4515205619767598, -10.2916870903837587, -15.7812984571900063 }, 1E-12);
    // Check standard errors from R
    double[] errors = mdl.estimateRegressionParametersStandardErrors();
    TestUtils.assertEquals(new double[] { 6.94881329475087, 0.07360008972340, 0.27410957467466, 0.19454551679325, 0.03726654773803 }, errors, 1E-10);
    // Check regression standard error against R
    Assert.assertEquals(7.73642194433223, mdl.estimateRegressionStandardError(), 1E-12);
    // Check R-Square statistics against R
    Assert.assertEquals(0.649789742860228, mdl.calculateRSquared(), 1E-12);
    Assert.assertEquals(0.6164363850373927, mdl.calculateAdjustedRSquared(), 1E-12);
    // TODO: uncomment
    // checkVarianceConsistency(model);
    // Estimate the model with no intercept
    mdl = new OLSMultipleLinearRegression();
    mdl.setNoIntercept(true);
    mdl.newSampleData(design, nobs, nvars, new DenseLocalOnHeapMatrix());
    // Check expected beta values from R
    betaHat = mdl.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat, new double[] { 0.52191832900513, 2.36588087917963, -0.94770353802795, 0.30851985863609 }, 1E-12);
    // Check expected residuals from R
    residuals = mdl.estimateResiduals();
    TestUtils.assertEquals(residuals, new double[] { 44.138759883538249, 27.720705122356215, 35.873200836126799, 34.574619581211977, 26.600168342080213, 15.074636243026923, -12.704904871199814, 1.497443824078134, 2.691972687079431, 5.582798774291231, -4.422986561283165, -9.198581600334345, 4.481765170730647, 2.273520207553216, -22.649827853221336, -17.747900013943308, 20.298314638496436, 6.861405135329779, -8.684712790954924, -10.298639278062371, -9.896618896845819, 4.568568616351242, -15.313570491727944, -13.762961360873966, 7.156100301980509, 16.722282219843990, 26.716200609071898, -1.991466398777079, -2.523342564719335, 9.776486693095093, -5.297535127628603, -16.639070567471094, -10.302057295211819, -23.549487860816846, 1.506624392156384, -17.939174438345930, 13.105792202765040, -1.943329906928462, -1.516005841666695, -0.759066561832886, 20.793137744128977, -2.485236153005426, 27.588238710486976, 2.658333257106881, -15.998337823623046, -5.550742066720694, -14.219077806826615 }, 1E-12);
    // Check standard errors from R
    errors = mdl.estimateRegressionParametersStandardErrors();
    TestUtils.assertEquals(new double[] { 0.10470063765677, 0.41684100584290, 0.43370143099691, 0.07694953606522 }, errors, 1E-10);
    // Check regression standard error against R
    Assert.assertEquals(17.24710630547, mdl.estimateRegressionStandardError(), 1E-10);
    // Check R-Square statistics against R
    Assert.assertEquals(0.946350722085, mdl.calculateRSquared(), 1E-12);
    Assert.assertEquals(0.9413600915813, mdl.calculateAdjustedRSquared(), 1E-12);
}
Also used : DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Test(org.junit.Test)

Example 3 with DenseLocalOnHeapMatrix

use of org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix in project ignite by apache.

the class OLSMultipleLinearRegressionTest method testWampler3.

/**
     * This is a test based on the Wampler3 data set
     * http://www.itl.nist.gov/div898/strd/lls/data/Wampler3.shtml
     */
@Test
public void testWampler3() {
    double[] data = new double[] { 760, 0, -2042, 1, 2111, 2, -1684, 3, 3888, 4, 1858, 5, 11379, 6, 17560, 7, 39287, 8, 64382, 9, 113159, 10, 175108, 11, 273291, 12, 400186, 13, 581243, 14, 811568, 15, 1121004, 16, 1506550, 17, 2002767, 18, 2611612, 19, 3369180, 20 };
    OLSMultipleLinearRegression mdl = new OLSMultipleLinearRegression();
    final int nvars = 5;
    final int nobs = 21;
    double[] tmp = new double[(nvars + 1) * nobs];
    int off = 0;
    int off2 = 0;
    for (int i = 0; i < nobs; i++) {
        tmp[off2] = data[off];
        tmp[off2 + 1] = data[off + 1];
        tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
        tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
        tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
        tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
        off2 += (nvars + 1);
        off += 2;
    }
    mdl.newSampleData(tmp, nobs, nvars, new DenseLocalOnHeapMatrix());
    double[] betaHat = mdl.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, 1E-8);
    double[] se = mdl.estimateRegressionParametersStandardErrors();
    TestUtils.assertEquals(se, new double[] { 2152.32624678170, 2363.55173469681, 779.343524331583, 101.475507550350, 5.64566512170752, 0.112324854679312 }, //
    1E-8);
    TestUtils.assertEquals(.999995559025820, mdl.calculateRSquared(), 1.0e-10);
    TestUtils.assertEquals(5570284.53333333, mdl.estimateErrorVariance(), 1.0e-6);
    TestUtils.assertEquals(83554268.0000000, mdl.calculateResidualSumOfSquares(), 1.0e-5);
}
Also used : DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Test(org.junit.Test)

Example 4 with DenseLocalOnHeapMatrix

use of org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix in project ignite by apache.

the class OLSMultipleLinearRegressionTest method testWampler4.

/**
     * This is a test based on the Wampler4 data set
     * http://www.itl.nist.gov/div898/strd/lls/data/Wampler4.shtml
     */
@Test
public void testWampler4() {
    double[] data = new double[] { 75901, 0, -204794, 1, 204863, 2, -204436, 3, 253665, 4, -200894, 5, 214131, 6, -185192, 7, 221249, 8, -138370, 9, 315911, 10, -27644, 11, 455253, 12, 197434, 13, 783995, 14, 608816, 15, 1370781, 16, 1303798, 17, 2205519, 18, 2408860, 19, 3444321, 20 };
    OLSMultipleLinearRegression mdl = new OLSMultipleLinearRegression();
    final int nvars = 5;
    final int nobs = 21;
    double[] tmp = new double[(nvars + 1) * nobs];
    int off = 0;
    int off2 = 0;
    for (int i = 0; i < nobs; i++) {
        tmp[off2] = data[off];
        tmp[off2 + 1] = data[off + 1];
        tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
        tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
        tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
        tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
        off2 += (nvars + 1);
        off += 2;
    }
    mdl.newSampleData(tmp, nobs, nvars, new DenseLocalOnHeapMatrix());
    double[] betaHat = mdl.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, 1E-6);
    double[] se = mdl.estimateRegressionParametersStandardErrors();
    TestUtils.assertEquals(se, new double[] { 215232.624678170, 236355.173469681, 77934.3524331583, 10147.5507550350, 564.566512170752, 11.2324854679312 }, 1E-8);
    TestUtils.assertEquals(.957478440825662, mdl.calculateRSquared(), 1.0e-10);
    TestUtils.assertEquals(55702845333.3333, mdl.estimateErrorVariance(), 1.0e-4);
    TestUtils.assertEquals(835542680000.000, mdl.calculateResidualSumOfSquares(), 1.0e-3);
}
Also used : DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Test(org.junit.Test)

Example 5 with DenseLocalOnHeapMatrix

use of org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix in project ignite by apache.

the class OLSMultipleLinearRegressionTest method testWampler1.

/**
     * This is a test based on the Wampler1 data set
     * http://www.itl.nist.gov/div898/strd/lls/data/Wampler1.shtml
     */
@Test
public void testWampler1() {
    double[] data = new double[] { 1, 0, 6, 1, 63, 2, 364, 3, 1365, 4, 3906, 5, 9331, 6, 19608, 7, 37449, 8, 66430, 9, 111111, 10, 177156, 11, 271453, 12, 402234, 13, 579195, 14, 813616, 15, 1118481, 16, 1508598, 17, 2000719, 18, 2613660, 19, 3368421, 20 };
    OLSMultipleLinearRegression mdl = new OLSMultipleLinearRegression();
    final int nvars = 5;
    final int nobs = 21;
    double[] tmp = new double[(nvars + 1) * nobs];
    int off = 0;
    int off2 = 0;
    for (int i = 0; i < nobs; i++) {
        tmp[off2] = data[off];
        tmp[off2 + 1] = data[off + 1];
        tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
        tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
        tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
        tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
        off2 += (nvars + 1);
        off += 2;
    }
    mdl.newSampleData(tmp, nobs, nvars, new DenseLocalOnHeapMatrix());
    double[] betaHat = mdl.estimateRegressionParameters();
    TestUtils.assertEquals(betaHat, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, 1E-8);
    double[] se = mdl.estimateRegressionParametersStandardErrors();
    TestUtils.assertEquals(se, new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, 1E-8);
    TestUtils.assertEquals(1.0, mdl.calculateRSquared(), 1.0e-10);
    TestUtils.assertEquals(0, mdl.estimateErrorVariance(), 1.0e-7);
    TestUtils.assertEquals(0.00, mdl.calculateResidualSumOfSquares(), 1.0e-6);
}
Also used : DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Test(org.junit.Test)

Aggregations

DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)67 Test (org.junit.Test)33 Matrix (org.apache.ignite.ml.math.Matrix)28 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)19 Vector (org.apache.ignite.ml.math.Vector)17 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)14 ArrayList (java.util.ArrayList)8 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)6 Random (java.util.Random)5 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)5 Arrays (java.util.Arrays)4 Collections (java.util.Collections)4 Comparator (java.util.Comparator)4 MLPArchitecture (org.apache.ignite.ml.nn.architecture.MLPArchitecture)4 Assert.assertEquals (org.junit.Assert.assertEquals)4 Assert.assertTrue (org.junit.Assert.assertTrue)4 Ignite (org.apache.ignite.Ignite)3 KMeansLocalClusterer (org.apache.ignite.ml.clustering.KMeansLocalClusterer)3 BaseFuzzyCMeansClusterer (org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer)2 FuzzyCMeansLocalClusterer (org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer)2