Examples with KMeans - smile.clustering.KMeans

Example 1 with KMeans

use of smile.clustering.KMeans in project smile by haifengl.

the class GaussianProcessRegressionTest method testKin8nm.

/**
     * Test of learn method, of class GaussianProcessRegression.
     */
@Test
public void testKin8nm() {
    System.out.println("kin8nm");
    ArffParser parser = new ArffParser();
    parser.setResponseIndex(8);
    try {
        AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/regression/kin8nm.arff"));
        double[] y = data.toArray(new double[data.size()]);
        double[][] x = data.toArray(new double[data.size()][]);
        int[] perm = Math.permutate(x.length);
        double[][] datax = new double[4000][];
        double[] datay = new double[datax.length];
        for (int i = 0; i < datax.length; i++) {
            datax[i] = x[perm[i]];
            datay[i] = y[perm[i]];
        }
        int n = datax.length;
        int k = 10;
        CrossValidation cv = new CrossValidation(n, k);
        double rss = 0.0;
        double sparseRSS30 = 0.0;
        for (int i = 0; i < k; i++) {
            double[][] trainx = Math.slice(datax, cv.train[i]);
            double[] trainy = Math.slice(datay, cv.train[i]);
            double[][] testx = Math.slice(datax, cv.test[i]);
            double[] testy = Math.slice(datay, cv.test[i]);
            GaussianProcessRegression<double[]> rkhs = new GaussianProcessRegression<>(trainx, trainy, new GaussianKernel(34.97), 0.1);
            KMeans kmeans = new KMeans(trainx, 30, 10);
            double[][] centers = kmeans.centroids();
            double r0 = 0.0;
            for (int l = 0; l < centers.length; l++) {
                for (int j = 0; j < l; j++) {
                    r0 += Math.distance(centers[l], centers[j]);
                }
            }
            r0 /= (2 * centers.length);
            System.out.println("Kernel width = " + r0);
            GaussianProcessRegression<double[]> sparse30 = new GaussianProcessRegression<>(trainx, trainy, centers, new GaussianKernel(r0), 0.1);
            for (int j = 0; j < testx.length; j++) {
                double r = testy[j] - rkhs.predict(testx[j]);
                rss += r * r;
                r = testy[j] - sparse30.predict(testx[j]);
                sparseRSS30 += r * r;
            }
        }
        System.out.println("Regular 10-CV MSE = " + rss / n);
        System.out.println("Sparse (30) 10-CV MSE = " + sparseRSS30 / n);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Also used : AttributeDataset(smile.data.AttributeDataset) KMeans(smile.clustering.KMeans) ArffParser(smile.data.parser.ArffParser) CrossValidation(smile.validation.CrossValidation) GaussianKernel(smile.math.kernel.GaussianKernel) Test(org.junit.Test)

Example 2 with KMeans

use of smile.clustering.KMeans in project smile by haifengl.

the class GaussianProcessRegressionTest method testCPU.

/**
     * Test of learn method, of class GaussianProcessRegression.
     */
@Test
public void testCPU() {
    System.out.println("CPU");
    ArffParser parser = new ArffParser();
    parser.setResponseIndex(6);
    try {
        AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/cpu.arff"));
        double[] datay = data.toArray(new double[data.size()]);
        double[][] datax = data.toArray(new double[data.size()][]);
        Math.standardize(datax);
        int n = datax.length;
        int k = 10;
        CrossValidation cv = new CrossValidation(n, k);
        double rss = 0.0;
        double sparseRSS30 = 0.0;
        double nystromRSS30 = 0.0;
        for (int i = 0; i < k; i++) {
            double[][] trainx = Math.slice(datax, cv.train[i]);
            double[] trainy = Math.slice(datay, cv.train[i]);
            double[][] testx = Math.slice(datax, cv.test[i]);
            double[] testy = Math.slice(datay, cv.test[i]);
            GaussianProcessRegression<double[]> rkhs = new GaussianProcessRegression<>(trainx, trainy, new GaussianKernel(47.02), 0.1);
            KMeans kmeans = new KMeans(trainx, 30, 10);
            double[][] centers = kmeans.centroids();
            double r0 = 0.0;
            for (int l = 0; l < centers.length; l++) {
                for (int j = 0; j < l; j++) {
                    r0 += Math.distance(centers[l], centers[j]);
                }
            }
            r0 /= (2 * centers.length);
            System.out.println("Kernel width = " + r0);
            GaussianProcessRegression<double[]> sparse30 = new GaussianProcessRegression<>(trainx, trainy, centers, new GaussianKernel(r0), 0.1);
            GaussianProcessRegression<double[]> nystrom30 = new GaussianProcessRegression<>(trainx, trainy, centers, new GaussianKernel(r0), 0.1, true);
            for (int j = 0; j < testx.length; j++) {
                double r = testy[j] - rkhs.predict(testx[j]);
                rss += r * r;
                r = testy[j] - sparse30.predict(testx[j]);
                sparseRSS30 += r * r;
                r = testy[j] - nystrom30.predict(testx[j]);
                nystromRSS30 += r * r;
            }
        }
        System.out.println("Regular 10-CV MSE = " + rss / n);
        System.out.println("Sparse (30) 10-CV MSE = " + sparseRSS30 / n);
        System.out.println("Nystrom (30) 10-CV MSE = " + nystromRSS30 / n);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Also used : ArffParser(smile.data.parser.ArffParser) AttributeDataset(smile.data.AttributeDataset) KMeans(smile.clustering.KMeans) CrossValidation(smile.validation.CrossValidation) GaussianKernel(smile.math.kernel.GaussianKernel) Test(org.junit.Test)

Example 3 with KMeans

use of smile.clustering.KMeans in project smile by haifengl.

the class GaussianProcessRegressionTest method test2DPlanes.

/**
     * Test of learn method, of class GaussianProcessRegression.
     */
@Test
public void test2DPlanes() {
    System.out.println("2dplanes");
    ArffParser parser = new ArffParser();
    parser.setResponseIndex(10);
    try {
        AttributeDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/regression/2dplanes.arff"));
        double[][] x = data.toArray(new double[data.size()][]);
        double[] y = data.toArray(new double[data.size()]);
        int[] perm = Math.permutate(x.length);
        double[][] datax = new double[4000][];
        double[] datay = new double[datax.length];
        for (int i = 0; i < datax.length; i++) {
            datax[i] = x[perm[i]];
            datay[i] = y[perm[i]];
        }
        int n = datax.length;
        int k = 10;
        CrossValidation cv = new CrossValidation(n, k);
        double rss = 0.0;
        double sparseRSS30 = 0.0;
        for (int i = 0; i < k; i++) {
            double[][] trainx = Math.slice(datax, cv.train[i]);
            double[] trainy = Math.slice(datay, cv.train[i]);
            double[][] testx = Math.slice(datax, cv.test[i]);
            double[] testy = Math.slice(datay, cv.test[i]);
            GaussianProcessRegression<double[]> rkhs = new GaussianProcessRegression<>(trainx, trainy, new GaussianKernel(34.866), 0.1);
            KMeans kmeans = new KMeans(trainx, 30, 10);
            double[][] centers = kmeans.centroids();
            double r0 = 0.0;
            for (int l = 0; l < centers.length; l++) {
                for (int j = 0; j < l; j++) {
                    r0 += Math.distance(centers[l], centers[j]);
                }
            }
            r0 /= (2 * centers.length);
            System.out.println("Kernel width = " + r0);
            GaussianProcessRegression<double[]> sparse30 = new GaussianProcessRegression<>(trainx, trainy, centers, new GaussianKernel(r0), 0.1);
            for (int j = 0; j < testx.length; j++) {
                double r = testy[j] - rkhs.predict(testx[j]);
                rss += r * r;
                r = testy[j] - sparse30.predict(testx[j]);
                sparseRSS30 += r * r;
            }
        }
        System.out.println("Regular 10-CV MSE = " + rss / n);
        System.out.println("Sparse (30) 10-CV MSE = " + sparseRSS30 / n);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Example 4 with KMeans

use of smile.clustering.KMeans in project smile by haifengl.

the class SmileUtils method learnGaussianRadialBasis.

/**
     * Learns Gaussian RBF function and centers from data. The centers are
     * chosen as the centroids of K-Means. The standard deviation (i.e. width)
     * of Gaussian radial basis function is estimated as the width of each
     * cluster multiplied with a given scaling parameter r.
     * @param x the training dataset.
     * @param centers an array to store centers on output. Its length is used as k of k-means.
     * @param r the scaling parameter.
     * @return Gaussian RBF functions with parameter learned from data.
     */
public static GaussianRadialBasis[] learnGaussianRadialBasis(double[][] x, double[][] centers, double r) {
    if (r <= 0.0) {
        throw new IllegalArgumentException("Invalid scaling parameter: " + r);
    }
    int k = centers.length;
    KMeans kmeans = new KMeans(x, k, 10);
    System.arraycopy(kmeans.centroids(), 0, centers, 0, k);
    int n = x.length;
    int[] y = kmeans.getClusterLabel();
    double[] sigma = new double[k];
    for (int i = 0; i < n; i++) {
        sigma[y[i]] += Math.squaredDistance(x[i], centers[y[i]]);
    }
    int[] ni = kmeans.getClusterSize();
    GaussianRadialBasis[] rbf = new GaussianRadialBasis[k];
    for (int i = 0; i < k; i++) {
        if (ni[i] >= 5 || sigma[i] != 0.0) {
            sigma[i] = Math.sqrt(sigma[i] / ni[i]);
        } else {
            sigma[i] = Double.POSITIVE_INFINITY;
            for (int j = 0; j < k; j++) {
                if (i != j) {
                    double d = Math.distance(centers[i], centers[j]);
                    if (d < sigma[i]) {
                        sigma[i] = d;
                    }
                }
            }
            sigma[i] /= 2.0;
        }
        rbf[i] = new GaussianRadialBasis(r * sigma[i]);
    }
    return rbf;
}

Also used : GaussianRadialBasis(smile.math.rbf.GaussianRadialBasis) KMeans(smile.clustering.KMeans)

Example 5 with KMeans

use of smile.clustering.KMeans in project smile by haifengl.

the class SmileUtils method learnGaussianRadialBasis.

/**
     * Learns Gaussian RBF function and centers from data. The centers are
     * chosen as the centroids of K-Means. Let d<sub>max</sub> be the maximum
     * distance between the chosen centers, the standard deviation (i.e. width)
     * of Gaussian radial basis function is d<sub>max</sub> / sqrt(2*k), where
     * k is number of centers. This choice would be close to the optimal
     * solution if the data were uniformly distributed in the input space,
     * leading to a uniform distribution of centroids.
     * @param x the training dataset.
     * @param centers an array to store centers on output. Its length is used as k of k-means.
     * @return a Gaussian RBF function with parameter learned from data.
     */
public static GaussianRadialBasis learnGaussianRadialBasis(double[][] x, double[][] centers) {
    int k = centers.length;
    KMeans kmeans = new KMeans(x, k, 10);
    System.arraycopy(kmeans.centroids(), 0, centers, 0, k);
    double r0 = 0.0;
    for (int i = 0; i < k; i++) {
        for (int j = 0; j < i; j++) {
            double d = Math.distance(centers[i], centers[j]);
            if (r0 < d) {
                r0 = d;
            }
        }
    }
    r0 /= Math.sqrt(2 * k);
    return new GaussianRadialBasis(r0);
}

Also used : GaussianRadialBasis(smile.math.rbf.GaussianRadialBasis) KMeans(smile.clustering.KMeans)

Aggregations

KMeans (smile.clustering.KMeans)11 Test (org.junit.Test)6 AttributeDataset (smile.data.AttributeDataset)6 ArffParser (smile.data.parser.ArffParser)6 GaussianKernel (smile.math.kernel.GaussianKernel)6 CrossValidation (smile.validation.CrossValidation)6 GaussianRadialBasis (smile.math.rbf.GaussianRadialBasis)3 PlotCanvas (smile.plot.PlotCanvas)1