Search in sources :

Example 1 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestClassificationExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println("\n>>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.WINE_RECOGNITION);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestClassifierTrainer classifier = new RandomForestClassifierTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            System.out.println(">>> Configured trainer: " + classifier.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            RandomForestModel mdl = classifier.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Exported Random Forest classification model: " + mdl.toString(true));
            double accuracy = evaluateModel(dataCache, mdl);
            System.out.println("\n>>> Accuracy for exported Random Forest classification model " + accuracy);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            RandomForestModel modelImportedFromJSON = RandomForestModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported Random Forest classification model: " + modelImportedFromJSON);
            accuracy = evaluateModel(dataCache, mdl);
            System.out.println("\n>>> Accuracy for imported Random Forest classification model " + accuracy);
            System.out.println("\n>>> Random Forest multi-class classification algorithm over cached dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) RandomForestModel(org.apache.ignite.ml.tree.randomforest.RandomForestModel) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestClassifierTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 2 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestRegressionExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest regression algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println("\n>>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            trainer.withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withParallelismStrategyTypeDependency(ParallelismStrategy.ON_DEFAULT_POOL).withLoggingFactoryDependency(ConsoleLogger.Factory.LOW));
            System.out.println("\n>>> Configured trainer: " + trainer.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            RandomForestModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported Random Forest regression model: " + mdl.toString(true));
            double mae = evaluateModel(dataCache, mdl);
            System.out.println("\n>>> Mean absolute error (MAE) for exported Random Forest regression model " + mae);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            RandomForestModel modelImportedFromJSON = RandomForestModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Exported Random Forest regression model: " + modelImportedFromJSON.toString(true));
            mae = evaluateModel(dataCache, modelImportedFromJSON);
            System.out.println("\n>>> Mean absolute error (MAE) for exported Random Forest regression model " + mae);
            System.out.println("\n>>> Random Forest regression algorithm over cached dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) RandomForestModel(org.apache.ignite.ml.tree.randomforest.RandomForestModel) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestRegressionTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 3 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class LearningEnvironmentTest method testBasic.

/**
 */
@Test
public void testBasic() {
    RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(IntStream.range(0, 0).mapToObj(x -> new FeatureMeta("", 0, false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
    LearningEnvironmentBuilder envBuilder = LearningEnvironmentBuilder.defaultBuilder().withParallelismStrategyType(ParallelismStrategy.Type.ON_DEFAULT_POOL).withLoggingFactoryDependency(part -> ConsoleLogger.factory(MLLogger.VerboseLevel.LOW));
    trainer.withEnvironmentBuilder(envBuilder);
    assertEquals(DefaultParallelismStrategy.class, trainer.learningEnvironment().parallelismStrategy().getClass());
    assertEquals(ConsoleLogger.class, trainer.learningEnvironment().logger().getClass());
}
Also used : FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestRegressionTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer) Test(org.junit.Test)

Example 4 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestClassifierTrainerTest method testUpdate.

/**
 */
@Test
public void testUpdate() {
    int sampleSize = 1000;
    Map<Integer, LabeledVector<Double>> sample = new HashMap<>();
    for (int i = 0; i < sampleSize; i++) {
        double x1 = i;
        double x2 = x1 / 10.0;
        double x3 = x2 / 10.0;
        double x4 = x3 / 10.0;
        sample.put(i, VectorUtils.of(x1, x2, x3, x4).labeled((double) i % 2));
    }
    ArrayList<FeatureMeta> meta = new ArrayList<>();
    for (int i = 0; i < 4; i++) meta.add(new FeatureMeta("", i, false));
    DatasetTrainer<RandomForestModel, Double> trainer = new RandomForestClassifierTrainer(meta).withAmountOfTrees(100).withFeaturesCountSelectionStrgy(x -> 2).withEnvironmentBuilder(TestUtils.testEnvBuilder());
    RandomForestModel originalMdl = trainer.fit(sample, parts, new LabeledDummyVectorizer<>());
    RandomForestModel updatedOnSameDS = trainer.update(originalMdl, sample, parts, new LabeledDummyVectorizer<>());
    RandomForestModel updatedOnEmptyDS = trainer.update(originalMdl, new HashMap<Integer, LabeledVector<Double>>(), parts, new LabeledDummyVectorizer<>());
    Vector v = VectorUtils.of(5, 0.5, 0.05, 0.005);
    assertEquals(originalMdl.predict(v), updatedOnSameDS.predict(v), 0.01);
    assertEquals(originalMdl.predict(v), updatedOnEmptyDS.predict(v), 0.01);
}
Also used : TrainerTest(org.apache.ignite.ml.common.TrainerTest) OnMajorityPredictionsAggregator(org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator) TestUtils(org.apache.ignite.ml.TestUtils) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) Test(org.junit.Test) DatasetTrainer(org.apache.ignite.ml.trainers.DatasetTrainer) LabeledDummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.LabeledDummyVectorizer) ArrayList(java.util.ArrayList) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) Map(java.util.Map) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 5 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class GiniFeatureHistogramTest method testOfSums.

/**
 */
@Test
public void testOfSums() {
    int sampleId = 0;
    BucketMeta bucketMeta1 = new BucketMeta(new FeatureMeta("", 0, false));
    bucketMeta1.setMinVal(0.);
    bucketMeta1.setBucketSize(0.1);
    BucketMeta bucketMeta2 = new BucketMeta(new FeatureMeta("", 1, true));
    GiniHistogram forAllHist1 = new GiniHistogram(sampleId, lblMapping, bucketMeta1);
    GiniHistogram forAllHist2 = new GiniHistogram(sampleId, lblMapping, bucketMeta2);
    List<GiniHistogram> partitions1 = new ArrayList<>();
    List<GiniHistogram> partitions2 = new ArrayList<>();
    int cntOfPartitions = rnd.nextInt(1000);
    for (int i = 0; i < cntOfPartitions; i++) {
        partitions1.add(new GiniHistogram(sampleId, lblMapping, bucketMeta1));
        partitions2.add(new GiniHistogram(sampleId, lblMapping, bucketMeta2));
    }
    int datasetSize = rnd.nextInt(10000);
    for (int i = 0; i < datasetSize; i++) {
        BootstrappedVector vec = randomVector(true);
        vec.features().set(1, (vec.features().get(1) * 100) % 100);
        forAllHist1.addElement(vec);
        forAllHist2.addElement(vec);
        int partId = rnd.nextInt(cntOfPartitions);
        partitions1.get(partId).addElement(vec);
        partitions2.get(partId).addElement(vec);
    }
    checkSums(forAllHist1, partitions1);
    checkSums(forAllHist2, partitions2);
    GiniHistogram emptyHist1 = new GiniHistogram(sampleId, lblMapping, bucketMeta1);
    GiniHistogram emptyHist2 = new GiniHistogram(sampleId, lblMapping, bucketMeta2);
    assertTrue(forAllHist1.isEqualTo(forAllHist1.plus(emptyHist1)));
    assertTrue(forAllHist2.isEqualTo(forAllHist2.plus(emptyHist2)));
    assertTrue(forAllHist1.isEqualTo(emptyHist1.plus(forAllHist1)));
    assertTrue(forAllHist2.isEqualTo(emptyHist2.plus(forAllHist2)));
}
Also used : FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) ArrayList(java.util.ArrayList) BucketMeta(org.apache.ignite.ml.dataset.feature.BucketMeta) BootstrappedVector(org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector) Test(org.junit.Test)

Aggregations

FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)10 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 Ignite (org.apache.ignite.Ignite)4 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)4 RandomForestRegressionTrainer (org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer)3 Path (java.nio.file.Path)2 HashMap (java.util.HashMap)2 Cache (javax.cache.Cache)2 IgniteCache (org.apache.ignite.IgniteCache)2 TrainerTest (org.apache.ignite.ml.common.TrainerTest)2 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)2 BucketMeta (org.apache.ignite.ml.dataset.feature.BucketMeta)2 BootstrappedVector (org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector)2 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)2 RandomForestClassifierTrainer (org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer)2 RandomForestModel (org.apache.ignite.ml.tree.randomforest.RandomForestModel)2 Map (java.util.Map)1