Search in sources :

Example 1 with KMeansLloyd

use of de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd in project elki by elki-project.

the class WithinClusterMeanDistanceQualityMeasureTest method testOverallDistance.

/**
 * Test cluster average overall distance.
 */
@Test
public void testOverallDistance() {
    Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    KMeansLloyd<DoubleVector> kmeans = // 
    new ELKIBuilder<KMeansLloyd<DoubleVector>>(KMeansLloyd.class).with(KMeans.K_ID, // 
    2).with(KMeans.INIT_ID, // 
    FirstKInitialMeans.class).build();
    // run KMeans on database
    Clustering<KMeansModel> result = kmeans.run(db);
    final NumberVectorDistanceFunction<? super DoubleVector> dist = kmeans.getDistanceFunction();
    // Test Cluster Average Overall Distance
    KMeansQualityMeasure<? super DoubleVector> overall = new WithinClusterMeanDistanceQualityMeasure();
    final double quality = overall.quality(result, dist, rel);
    assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10);
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FirstKInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.FirstKInitialMeans) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Example 2 with KMeansLloyd

use of de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 3 with KMeansLloyd

use of de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd in project elki by elki-project.

the class WithinClusterVarianceQualityMeasureTest method testVariance.

/**
 * Test cluster variance.
 */
@Test
public void testVariance() {
    Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    // Setup algorithm
    KMeansLloyd<DoubleVector> kmeans = // 
    new ELKIBuilder<KMeansLloyd<DoubleVector>>(KMeansLloyd.class).with(KMeans.K_ID, // 
    2).with(KMeans.INIT_ID, // 
    FirstKInitialMeans.class).build();
    // run KMeans on database
    Clustering<KMeansModel> result = kmeans.run(db);
    // Test Cluster Variance
    KMeansQualityMeasure<? super DoubleVector> variance = new WithinClusterVarianceQualityMeasure();
    final NumberVectorDistanceFunction<? super DoubleVector> dist = kmeans.getDistanceFunction();
    final double quality = variance.quality(result, dist, rel);
    assertEquals("Within cluster variance incorrect", 3.16666666666, quality, 1e-10);
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FirstKInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.FirstKInitialMeans) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Aggregations

KMeansLloyd (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd)3 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)3 Database (de.lmu.ifi.dbs.elki.database.Database)3 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)2 FirstKInitialMeans (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.FirstKInitialMeans)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 Test (org.junit.Test)2 RandomlyGeneratedInitialMeans (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1 ArrayAdapterDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection)1 DatabaseConnection (de.lmu.ifi.dbs.elki.datasource.DatabaseConnection)1 SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)1