Search in sources :

Example 6 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class MSEHistogramTest method testOfSums.

/**
 */
@Test
public void testOfSums() {
    int sampleId = 0;
    BucketMeta bucketMeta1 = new BucketMeta(new FeatureMeta("", 0, false));
    bucketMeta1.setMinVal(0.);
    bucketMeta1.setBucketSize(0.1);
    BucketMeta bucketMeta2 = new BucketMeta(new FeatureMeta("", 1, true));
    MSEHistogram forAllHist1 = new MSEHistogram(sampleId, bucketMeta1);
    MSEHistogram forAllHist2 = new MSEHistogram(sampleId, bucketMeta2);
    List<MSEHistogram> partitions1 = new ArrayList<>();
    List<MSEHistogram> partitions2 = new ArrayList<>();
    int cntOfPartitions = rnd.nextInt(100) + 1;
    for (int i = 0; i < cntOfPartitions; i++) {
        partitions1.add(new MSEHistogram(sampleId, bucketMeta1));
        partitions2.add(new MSEHistogram(sampleId, bucketMeta2));
    }
    int datasetSize = rnd.nextInt(1000) + 1;
    for (int i = 0; i < datasetSize; i++) {
        BootstrappedVector vec = randomVector(false);
        vec.features().set(1, (vec.features().get(1) * 100) % 100);
        forAllHist1.addElement(vec);
        forAllHist2.addElement(vec);
        int partId = rnd.nextInt(cntOfPartitions);
        partitions1.get(partId).addElement(vec);
        partitions2.get(partId).addElement(vec);
    }
    checkSums(forAllHist1, partitions1);
    checkSums(forAllHist2, partitions2);
    MSEHistogram emptyHist1 = new MSEHistogram(sampleId, bucketMeta1);
    MSEHistogram emptyHist2 = new MSEHistogram(sampleId, bucketMeta2);
    assertTrue(forAllHist1.isEqualTo(forAllHist1.plus(emptyHist1)));
    assertTrue(forAllHist2.isEqualTo(forAllHist2.plus(emptyHist2)));
    assertTrue(forAllHist1.isEqualTo(emptyHist1.plus(forAllHist1)));
    assertTrue(forAllHist2.isEqualTo(emptyHist2.plus(forAllHist2)));
}
Also used : FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) ArrayList(java.util.ArrayList) BucketMeta(org.apache.ignite.ml.dataset.feature.BucketMeta) BootstrappedVector(org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector) Test(org.junit.Test)

Example 7 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestRegressionExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest regression algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            trainer.withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withParallelismStrategyTypeDependency(ParallelismStrategy.ON_DEFAULT_POOL).withLoggingFactoryDependency(ConsoleLogger.Factory.LOW));
            System.out.println(">>> Configured trainer: " + trainer.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            ModelsComposition randomForestMdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Trained model: " + randomForestMdl.toString(true));
            double mse = 0.0;
            double mae = 0.0;
            int totalAmount = 0;
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = randomForestMdl.predict(inputs);
                    mse += Math.pow(prediction - groundTruth, 2.0);
                    mae += Math.abs(prediction - groundTruth);
                    totalAmount++;
                }
                System.out.println("\n>>> Evaluated model on " + totalAmount + " data points.");
                mse /= totalAmount;
                System.out.println("\n>>> Mean squared error (MSE) " + mse);
                mae /= totalAmount;
                System.out.println("\n>>> Mean absolute error (MAE) " + mae);
                System.out.println(">>> Random Forest regression algorithm over cached dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) ModelsComposition(org.apache.ignite.ml.composition.ModelsComposition) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestRegressionTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 8 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestClassificationExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.WINE_RECOGNITION);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestClassifierTrainer classifier = new RandomForestClassifierTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            System.out.println(">>> Configured trainer: " + classifier.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            ModelsComposition randomForestMdl = classifier.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Trained model: " + randomForestMdl.toString(true));
            int amountOfErrors = 0;
            int totalAmount = 0;
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = randomForestMdl.predict(inputs);
                    totalAmount++;
                    if (!Precision.equals(groundTruth, prediction, Precision.EPSILON))
                        amountOfErrors++;
                }
                System.out.println("\n>>> Evaluated model on " + totalAmount + " data points.");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount));
                System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) ModelsComposition(org.apache.ignite.ml.composition.ModelsComposition) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestClassifierTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 9 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestIntegrationTest method testFit.

/**
 */
@Test
public void testFit() {
    int size = 100;
    CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
    trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
    trainingSetCacheCfg.setName("TRAINING_SET");
    IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
    Random rnd = new Random(0);
    for (int i = 0; i < size; i++) {
        double x = rnd.nextDouble() - 0.5;
        data.put(i, new double[] { x, x > 0 ? 1 : 0 });
    }
    ArrayList<FeatureMeta> meta = new ArrayList<>();
    meta.add(new FeatureMeta("", 0, false));
    RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(meta).withAmountOfTrees(5).withFeaturesCountSelectionStrgy(x -> 2);
    RandomForestModel mdl = trainer.fit(ignite, data, new DoubleArrayVectorizer<Integer>().labeled(1));
    assertTrue(mdl.getPredictionsAggregator() instanceof MeanValuePredictionsAggregator);
    assertEquals(5, mdl.getModels().size());
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) ArrayList(java.util.ArrayList) MeanValuePredictionsAggregator(org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator) Random(java.util.Random) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 10 with FeatureMeta

use of org.apache.ignite.ml.dataset.feature.FeatureMeta in project ignite by apache.

the class RandomForestRegressionTrainerTest method testUpdate.

/**
 */
@Test
public void testUpdate() {
    int sampleSize = 1000;
    Map<Double, LabeledVector<Double>> sample = new HashMap<>();
    for (int i = 0; i < sampleSize; i++) {
        double x1 = i;
        double x2 = x1 / 10.0;
        double x3 = x2 / 10.0;
        double x4 = x3 / 10.0;
        sample.put(x1 * x2 + x3 * x4, VectorUtils.of(x1, x2, x3, x4).labeled((double) i % 2));
    }
    ArrayList<FeatureMeta> meta = new ArrayList<>();
    for (int i = 0; i < 4; i++) meta.add(new FeatureMeta("", i, false));
    RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(meta).withAmountOfTrees(100).withFeaturesCountSelectionStrgy(x -> 2);
    RandomForestModel originalMdl = trainer.fit(sample, parts, new LabeledDummyVectorizer<>());
    RandomForestModel updatedOnSameDS = trainer.update(originalMdl, sample, parts, new LabeledDummyVectorizer<>());
    RandomForestModel updatedOnEmptyDS = trainer.update(originalMdl, new HashMap<Double, LabeledVector<Double>>(), parts, new LabeledDummyVectorizer<>());
    Vector v = VectorUtils.of(5, 0.5, 0.05, 0.005);
    assertEquals(originalMdl.predict(v), updatedOnSameDS.predict(v), 0.1);
    assertEquals(originalMdl.predict(v), updatedOnEmptyDS.predict(v), 0.1);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)10 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 Ignite (org.apache.ignite.Ignite)4 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)4 RandomForestRegressionTrainer (org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer)3 Path (java.nio.file.Path)2 HashMap (java.util.HashMap)2 Cache (javax.cache.Cache)2 IgniteCache (org.apache.ignite.IgniteCache)2 TrainerTest (org.apache.ignite.ml.common.TrainerTest)2 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)2 BucketMeta (org.apache.ignite.ml.dataset.feature.BucketMeta)2 BootstrappedVector (org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector)2 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)2 RandomForestClassifierTrainer (org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer)2 RandomForestModel (org.apache.ignite.ml.tree.randomforest.RandomForestModel)2 Map (java.util.Map)1