Search in sources :

Example 1 with UnknownCategorialValueException

use of org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException in project ignite by apache.

the class FrequencyEncoderPreprocessor method apply.

/**
 * Applies this preprocessor.
 *
 * @param k Key.
 * @param v Value.
 * @return Preprocessed row.
 */
@Override
public LabeledVector apply(K k, V v) {
    LabeledVector tmp = basePreprocessor.apply(k, v);
    double[] res = new double[tmp.size()];
    for (int i = 0; i < res.length; i++) {
        Object tmpObj = tmp.getRaw(i);
        if (handledIndices.contains(i)) {
            if (tmpObj.equals(Double.NaN) && encodingFrequencies[i].containsKey(KEY_FOR_NULL_VALUES))
                res[i] = encodingValues[i].get(KEY_FOR_NULL_VALUES);
            else if (encodingFrequencies[i].containsKey(tmpObj))
                res[i] = encodingFrequencies[i].get(tmpObj);
            else
                throw new UnknownCategorialValueException(tmpObj.toString());
        } else
            res[i] = (double) tmpObj;
    }
    return new LabeledVector(VectorUtils.of(res), tmp.label());
}
Also used : UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 2 with UnknownCategorialValueException

use of org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException in project ignite by apache.

the class LabelEncoderPreprocessor method apply.

/**
 * Applies this preprocessor.
 *
 * @param k Key.
 * @param v Value.
 * @return Preprocessed row.
 */
@Override
public LabeledVector apply(K k, V v) {
    LabeledVector tmp = basePreprocessor.apply(k, v);
    double res;
    Object tmpObj = tmp.label();
    if (tmpObj.equals(Double.NaN) && labelFrequencies.containsKey(KEY_FOR_NULL_VALUES))
        res = labelFrequencies.get(KEY_FOR_NULL_VALUES);
    else if (labelFrequencies.containsKey(tmpObj))
        res = labelFrequencies.get(tmpObj);
    else
        throw new UnknownCategorialValueException(tmpObj.toString());
    return new LabeledVector(tmp.features(), res);
}
Also used : UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DeployableObject(org.apache.ignite.ml.environment.deploy.DeployableObject)

Example 3 with UnknownCategorialValueException

use of org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException in project ignite by apache.

the class StringEncoderPreprocessor method apply.

/**
 * Applies this preprocessor.
 *
 * @param k Key.
 * @param v Value.
 * @return Preprocessed row.
 */
@Override
public LabeledVector apply(K k, V v) {
    LabeledVector tmp = basePreprocessor.apply(k, v);
    double[] res = new double[tmp.size()];
    for (int i = 0; i < res.length; i++) {
        Object tmpObj = tmp.getRaw(i);
        if (handledIndices.contains(i)) {
            if (tmpObj.equals(Double.NaN) && encodingValues[i].containsKey(KEY_FOR_NULL_VALUES))
                res[i] = encodingValues[i].get(KEY_FOR_NULL_VALUES);
            else if (encodingValues[i].containsKey(tmpObj))
                res[i] = encodingValues[i].get(tmpObj);
            else
                throw new UnknownCategorialValueException(tmpObj.toString());
        } else {
            if (tmpObj instanceof Number)
                res[i] = (double) tmpObj;
            else
                throw new IllegalFeatureTypeException(tmpObj.getClass(), tmpObj, Double.class);
        }
    }
    return new LabeledVector(VectorUtils.of(res), tmp.label());
}
Also used : UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DeployableObject(org.apache.ignite.ml.environment.deploy.DeployableObject) IllegalFeatureTypeException(org.apache.ignite.ml.math.exceptions.preprocessing.IllegalFeatureTypeException)

Example 4 with UnknownCategorialValueException

use of org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException in project ignite by apache.

the class OneHotEncoderPreprocessor method apply.

/**
 * Applies this preprocessor.
 *
 * @param k Key.
 * @param v Value.
 * @return Preprocessed row.
 */
@Override
public LabeledVector apply(K k, V v) {
    LabeledVector tmp = basePreprocessor.apply(k, v);
    int amountOfCategorialFeatures = handledIndices.size();
    double[] res = new double[tmp.size() - amountOfCategorialFeatures + getAdditionalSize(encodingValues)];
    int categorialFeatureCntr = 0;
    int resIdx = 0;
    for (int i = 0; i < tmp.size(); i++) {
        Object tmpObj = tmp.getRaw(i);
        if (handledIndices.contains(i)) {
            categorialFeatureCntr++;
            if (tmpObj.equals(Double.NaN) && encodingValues[i].containsKey(KEY_FOR_NULL_VALUES)) {
                final Integer indexedVal = encodingValues[i].get(KEY_FOR_NULL_VALUES);
                res[tmp.size() - amountOfCategorialFeatures + getIdxOffset(categorialFeatureCntr, indexedVal, encodingValues)] = 1.0;
            } else {
                final String key = String.valueOf(tmpObj);
                if (encodingValues[i].containsKey(key)) {
                    final Integer indexedVal = encodingValues[i].get(key);
                    res[tmp.size() - amountOfCategorialFeatures + getIdxOffset(categorialFeatureCntr, indexedVal, encodingValues)] = 1.0;
                } else
                    throw new UnknownCategorialValueException(tmpObj.toString());
            }
        } else {
            res[resIdx] = (double) tmpObj;
            resIdx++;
        }
    }
    return new LabeledVector(VectorUtils.of(res), tmp.label());
}
Also used : UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DeployableObject(org.apache.ignite.ml.environment.deploy.DeployableObject)

Example 5 with UnknownCategorialValueException

use of org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException in project ignite by apache.

the class EncoderTrainerTest method testFitWithUnknownStringValueInTheGivenData.

/**
 * Tests {@code fit()} method.
 */
@Test
public void testFitWithUnknownStringValueInTheGivenData() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(1, VectorUtils.of(3.0, 0.0));
    data.put(2, VectorUtils.of(3.0, 12.0));
    data.put(3, VectorUtils.of(3.0, 12.0));
    data.put(4, VectorUtils.of(2.0, 45.0));
    data.put(5, VectorUtils.of(2.0, 45.0));
    data.put(6, VectorUtils.of(14.0, 12.0));
    final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
    DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
    EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
    EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
    try {
        preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray();
        fail("UnknownCategorialFeatureValue");
    } catch (UnknownCategorialValueException e) {
        return;
    }
    fail("UnknownCategorialFeatureValue");
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) UnknownCategorialValueException(org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

UnknownCategorialValueException (org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException)6 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)4 DeployableObject (org.apache.ignite.ml.environment.deploy.DeployableObject)3 HashMap (java.util.HashMap)2 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)2 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)2 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)2 Test (org.junit.Test)2 Serializable (java.io.Serializable)1 HashSet (java.util.HashSet)1 TrainerTest (org.apache.ignite.ml.common.TrainerTest)1 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)1 IllegalFeatureTypeException (org.apache.ignite.ml.math.exceptions.preprocessing.IllegalFeatureTypeException)1 OneHotEncoderPreprocessor (org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor)1