Search in sources :

Example 6 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class StandardScalerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Standard scaler example started.");
        IgniteCache<Integer, Vector> data = null;
        try {
            data = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
            // Defines second preprocessor that scales features.
            Preprocessor<Integer, Vector> preprocessor = new StandardScalerTrainer<Integer, Vector>().fit(ignite, data, vectorizer);
            // Creates a cache based simple dataset containing features and providing standard dataset API.
            try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, data, preprocessor)) {
                new DatasetHelper(dataset).describe();
            }
            System.out.println(">>> Standard scaler example completed.");
        } finally {
            data.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DatasetHelper(org.apache.ignite.examples.ml.util.DatasetHelper)

Example 7 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class OneHotEncoderPreprocessorTest method testTwoCategorialFeatureAndTwoDoubleFeatures.

/**
 */
@Test
public void testTwoCategorialFeatureAndTwoDoubleFeatures() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "42", 1.0, "M", 2.0 }), new DenseVector(new Serializable[] { "43", 2.0, "F", 3.0 }), new DenseVector(new Serializable[] { "42", 3.0, Double.NaN, 4.0 }), new DenseVector(new Serializable[] { "42", 4.0, "F", 5.0 }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2, 3);
    HashMap[] encodingValues = new HashMap[4];
    encodingValues[0] = new HashMap() {

        {
            put("42", 0);
            put("43", 1);
        }
    };
    encodingValues[2] = new HashMap() {

        {
            put("F", 0);
            put("M", 1);
            put("", 2);
        }
    };
    OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(encodingValues, vectorizer, new HashSet() {

        {
            add(0);
            add(2);
        }
    });
    double[][] postProcessedData = new double[][] { { 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0 }, { 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0 }, { 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0 }, { 4.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
Also used : HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) OneHotEncoderPreprocessor(org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class OneHotEncoderPreprocessorTest method testOneCategorialFeature.

/**
 */
@Test
public void testOneCategorialFeature() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "42" }), new DenseVector(new Serializable[] { "43" }), new DenseVector(new Serializable[] { "42" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0);
    OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {

        {
            put("42", 0);
            put("43", 1);
        }
    } }, vectorizer, new HashSet() {

        {
            add(0);
        }
    });
    double[][] postProcessedData = new double[][] { { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
Also used : HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) OneHotEncoderPreprocessor(org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class EncoderTrainerTest method testFitWithUnknownStringValueInTheGivenData.

/**
 * Tests {@code fit()} method.
 */
@Test
public void testFitWithUnknownStringValueInTheGivenData() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(1, VectorUtils.of(3.0, 0.0));
    data.put(2, VectorUtils.of(3.0, 12.0));
    data.put(3, VectorUtils.of(3.0, 12.0));
    data.put(4, VectorUtils.of(2.0, 45.0));
    data.put(5, VectorUtils.of(2.0, 45.0));
    data.put(6, VectorUtils.of(14.0, 12.0));
    final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
    DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
    EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
    EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
    try {
        preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray();
        fail("UnknownCategorialFeatureValue");
    } catch (UnknownCategorialValueException e) {
        return;
    }
    fail("UnknownCategorialFeatureValue");
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 10 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class EncoderTrainerTest method testFitOnStringCategorialFeaturesWithReversedOrder.

/**
 * Tests {@code fit()} method.
 */
@Test
public void testFitOnStringCategorialFeaturesWithReversedOrder() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(1, new DenseVector(new Serializable[] { "Monday", "September" }));
    data.put(2, new DenseVector(new Serializable[] { "Monday", "August" }));
    data.put(3, new DenseVector(new Serializable[] { "Monday", "August" }));
    data.put(4, new DenseVector(new Serializable[] { "Friday", "June" }));
    data.put(5, new DenseVector(new Serializable[] { "Friday", "June" }));
    data.put(6, new DenseVector(new Serializable[] { "Sunday", "August" }));
    final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
    DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
    EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncoderIndexingStrategy(EncoderSortingStrategy.FREQUENCY_ASC).withEncodedFeature(0).withEncodedFeature(1);
    EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
    assertArrayEquals(new double[] { 2.0, 0.0 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray(), 1e-8);
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)23 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)23 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)18 Ignite (org.apache.ignite.Ignite)13 HashMap (java.util.HashMap)10 Test (org.junit.Test)10 DatasetHelper (org.apache.ignite.examples.ml.util.DatasetHelper)7 HashSet (java.util.HashSet)6 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)5 Serializable (java.io.Serializable)4 IgniteCache (org.apache.ignite.IgniteCache)4 OneHotEncoderPreprocessor (org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor)4 TrainerTest (org.apache.ignite.ml.common.TrainerTest)3 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)3 Cache (javax.cache.Cache)2 Ignition (org.apache.ignite.Ignition)2 Vectorizer (org.apache.ignite.ml.dataset.feature.extractor.Vectorizer)2 UnknownCategorialValueException (org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException)2 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)2 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)2