Search in sources :

Example 1 with KMeansModel

use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.

the class KMeansClusterizationExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> KMeans clustering algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            KMeansTrainer trainer = new KMeansTrainer().withDistance(new WeightedMinkowskiDistance(2, new double[] { 5.9360, 2.7700, 4.2600, 1.3260 }));
            // .withDistance(new MinkowskiDistance(2));
            KMeansModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported KMeans model: " + mdl);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            KMeansModel modelImportedFromJSON = KMeansModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported KMeans model: " + modelImportedFromJSON);
            System.out.println("\n>>> KMeans clustering algorithm over cached dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) WeightedMinkowskiDistance(org.apache.ignite.ml.math.distances.WeightedMinkowskiDistance)

Example 2 with KMeansModel

use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.

the class KeepBinaryTest method test.

/**
 * Startup Ignite, populate cache and train some model.
 */
@Test
public void test() {
    IgniteCache<Integer, BinaryObject> dataCache = populateCache(ignite);
    KMeansTrainer trainer = new KMeansTrainer();
    CacheBasedDatasetBuilder<Integer, BinaryObject> datasetBuilder = new CacheBasedDatasetBuilder<>(ignite, dataCache).withKeepBinary(true);
    KMeansModel mdl = trainer.fit(datasetBuilder, new BinaryObjectVectorizer<Integer>("feature1").labeled("label"));
    Integer zeroCentre = mdl.predict(VectorUtils.num2Vec(0.0));
    assertTrue(mdl.centers()[zeroCentre].get(0) == 0);
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) BinaryObjectVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.BinaryObjectVectorizer) BinaryObject(org.apache.ignite.binary.BinaryObject) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 3 with KMeansModel

use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.

the class CollectionsTest method test.

/**
 */
@Test
@SuppressWarnings("unchecked")
public void test() {
    test(new VectorizedViewMatrix(new DenseMatrix(2, 2), 1, 1, 1, 1), new VectorizedViewMatrix(new DenseMatrix(3, 2), 2, 1, 1, 1));
    specialTest(new ManhattanDistance(), new ManhattanDistance());
    specialTest(new HammingDistance(), new HammingDistance());
    specialTest(new EuclideanDistance(), new EuclideanDistance());
    FeatureMetadata data = new FeatureMetadata("name2");
    data.setName("name1");
    test(data, new FeatureMetadata("name2"));
    test(new DatasetRow<>(new DenseVector()), new DatasetRow<>(new DenseVector(1)));
    test(new LabeledVector<>(new DenseVector(), null), new LabeledVector<>(new DenseVector(1), null));
    test(new Dataset<DatasetRow<Vector>>(new DatasetRow[] {}, new FeatureMetadata[] {}), new Dataset<DatasetRow<Vector>>(new DatasetRow[] { new DatasetRow() }, new FeatureMetadata[] { new FeatureMetadata() }));
    test(new LogisticRegressionModel(new DenseVector(), 1.0), new LogisticRegressionModel(new DenseVector(), 0.5));
    test(new KMeansModelFormat(new Vector[] {}, new ManhattanDistance()), new KMeansModelFormat(new Vector[] {}, new HammingDistance()));
    test(new KMeansModel(new Vector[] {}, new ManhattanDistance()), new KMeansModel(new Vector[] {}, new HammingDistance()));
    test(new SVMLinearClassificationModel(null, 1.0), new SVMLinearClassificationModel(null, 0.5));
    test(new ANNClassificationModel(new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()), new ANNClassificationModel(new LabeledVectorSet<>(1, 1), new ANNClassificationTrainer.CentroidStat()));
    test(new ANNModelFormat(1, new ManhattanDistance(), false, new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()), new ANNModelFormat(2, new ManhattanDistance(), false, new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()));
}
Also used : FeatureMetadata(org.apache.ignite.ml.structures.FeatureMetadata) HammingDistance(org.apache.ignite.ml.math.distances.HammingDistance) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) LogisticRegressionModel(org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel) ANNModelFormat(org.apache.ignite.ml.knn.ann.ANNModelFormat) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) KMeansModelFormat(org.apache.ignite.ml.clustering.kmeans.KMeansModelFormat) DenseMatrix(org.apache.ignite.ml.math.primitives.matrix.impl.DenseMatrix) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DatasetRow(org.apache.ignite.ml.structures.DatasetRow) VectorizedViewMatrix(org.apache.ignite.ml.math.primitives.vector.impl.VectorizedViewMatrix) ANNClassificationModel(org.apache.ignite.ml.knn.ann.ANNClassificationModel) SVMLinearClassificationModel(org.apache.ignite.ml.svm.SVMLinearClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Test(org.junit.Test)

Example 4 with KMeansModel

use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.

the class KMeansModelTest method predictClusters.

/**
 */
@Test
public void predictClusters() {
    DistanceMeasure distanceMeasure = new EuclideanDistance();
    Vector[] centers = new DenseVector[4];
    centers[0] = new DenseVector(new double[] { 1.0, 1.0 });
    centers[1] = new DenseVector(new double[] { -1.0, 1.0 });
    centers[2] = new DenseVector(new double[] { 1.0, -1.0 });
    centers[3] = new DenseVector(new double[] { -1.0, -1.0 });
    KMeansModel mdl = new KMeansModel(centers, distanceMeasure);
    Assert.assertTrue(mdl.toString().contains("KMeansModel"));
    Assert.assertEquals(mdl.predict(new DenseVector(new double[] { 1.1, 1.1 })), 0.0, PRECISION);
    Assert.assertEquals(mdl.predict(new DenseVector(new double[] { -1.1, 1.1 })), 1.0, PRECISION);
    Assert.assertEquals(mdl.predict(new DenseVector(new double[] { 1.1, -1.1 })), 2.0, PRECISION);
    Assert.assertEquals(mdl.predict(new DenseVector(new double[] { -1.1, -1.1 })), 3.0, PRECISION);
    Assert.assertEquals(mdl.distanceMeasure(), distanceMeasure);
    Assert.assertEquals(mdl.amountOfClusters(), 4);
    Assert.assertArrayEquals(mdl.centers(), centers);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Test(org.junit.Test)

Example 5 with KMeansModel

use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.

the class KMeansFromSparkExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws FileNotFoundException {
    System.out.println();
    System.out.println(">>> K-means model loaded from Spark through serialization over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = TitanicUtils.readPassengers(ignite);
            final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(0, 5, 6, 4).labeled(1);
            KMeansModel mdl = (KMeansModel) SparkModelParser.parse(SPARK_MDL_PATH, SupportedSparkModels.KMEANS, env);
            System.out.println(">>> K-Means model: " + mdl);
            System.out.println(">>> ------------------------------------");
            System.out.println(">>> | Predicted cluster\t| Is survived\t|");
            System.out.println(">>> ------------------------------------");
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    LabeledVector<Double> lv = vectorizer.apply(observation.getKey(), observation.getValue());
                    Vector inputs = lv.features();
                    double isSurvived = lv.label();
                    double clusterId = mdl.predict(inputs);
                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", clusterId, isSurvived);
                }
            }
            System.out.println(">>> ---------------------------------");
        } finally {
            dataCache.destroy();
        }
    }
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) IgniteCache(org.apache.ignite.IgniteCache) Cache(javax.cache.Cache)

Aggregations

KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)14 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)10 KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)9 Ignite (org.apache.ignite.Ignite)6 Test (org.junit.Test)6 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)5 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)5 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)4 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)4 Cache (javax.cache.Cache)3 IgniteCache (org.apache.ignite.IgniteCache)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 BinaryObject (org.apache.ignite.binary.BinaryObject)2 KMeansModelFormat (org.apache.ignite.ml.clustering.kmeans.KMeansModelFormat)2 TrainerTest (org.apache.ignite.ml.common.TrainerTest)2 Path (java.nio.file.Path)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1