Search in sources :

Example 21 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class FrequencyEncoderPreprocessorTest method testApply.

/**
 * Tests {@code apply()} method.
 */
@Test
public void testApply() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
    FrequencyEncoderPreprocessor<Integer, Vector> preprocessor = new FrequencyEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {

        {
            put("1", 0.33);
            put("2", 0.66);
        }
    }, new HashMap() {

        {
            put("Moscow", 1.0);
        }
    }, new HashMap() {

        {
            put("A", 0.33);
            put("B", 0.66);
        }
    } }, vectorizer, new HashSet() {

        {
            add(0);
            add(1);
            add(2);
        }
    });
    double[][] postProcessedData = new double[][] { { 0.33, 1.0, 0.33 }, { 0.66, 1.0, 0.66 }, { 0.66, 1.0, 0.66 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 0.1);
}
Also used : HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) FrequencyEncoderPreprocessor(org.apache.ignite.ml.preprocessing.encoding.frequency.FrequencyEncoderPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 22 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class OneHotEncoderPreprocessorTest method testApplyWithUnknownCategorialValues.

/**
 * The {@code apply()} method is failed with UnknownCategorialFeatureValue exception.
 *
 * The reason is missed information in encodingValues.
 *
 * @see UnknownCategorialValueException
 */
@Test
public void testApplyWithUnknownCategorialValues() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
    OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {

        {
            put("2", 0);
        }
    }, new HashMap() {

        {
            put("Moscow", 0);
        }
    }, new HashMap() {

        {
            put("A", 0);
            put("B", 1);
        }
    } }, vectorizer, new HashSet() {

        {
            add(0);
            add(1);
            add(2);
        }
    });
    double[][] postProcessedData = new double[][] { { 0.0, 1.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 0.0, 1.0 } };
    try {
        for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
        fail("UnknownCategorialFeatureValue");
    } catch (UnknownCategorialValueException e) {
        return;
    }
    fail("UnknownCategorialFeatureValue");
}
Also used : HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) OneHotEncoderPreprocessor(org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor) UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class OneHotEncoderPreprocessorTest method testApplyWithStringValues.

/**
 * Tests {@code apply()} method.
 */
@Test
public void testApplyWithStringValues() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
    OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {

        {
            put("1", 1);
            put("2", 0);
        }
    }, new HashMap() {

        {
            put("Moscow", 0);
        }
    }, new HashMap() {

        {
            put("A", 0);
            put("B", 1);
        }
    } }, vectorizer, new HashSet() {

        {
            add(0);
            add(1);
            add(2);
        }
    });
    double[][] postProcessedData = new double[][] { { 0.0, 1.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 0.0, 1.0 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
Also used : HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) OneHotEncoderPreprocessor(org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)23 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)23 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)18 Ignite (org.apache.ignite.Ignite)13 HashMap (java.util.HashMap)10 Test (org.junit.Test)10 DatasetHelper (org.apache.ignite.examples.ml.util.DatasetHelper)7 HashSet (java.util.HashSet)6 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)5 Serializable (java.io.Serializable)4 IgniteCache (org.apache.ignite.IgniteCache)4 OneHotEncoderPreprocessor (org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor)4 TrainerTest (org.apache.ignite.ml.common.TrainerTest)3 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)3 Cache (javax.cache.Cache)2 Ignition (org.apache.ignite.Ignition)2 Vectorizer (org.apache.ignite.ml.dataset.feature.extractor.Vectorizer)2 UnknownCategorialValueException (org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException)2 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)2 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)2