use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class FrequencyEncoderPreprocessorTest method testApply.
/**
* Tests {@code apply()} method.
*/
@Test
public void testApply() {
Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
FrequencyEncoderPreprocessor<Integer, Vector> preprocessor = new FrequencyEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {
{
put("1", 0.33);
put("2", 0.66);
}
}, new HashMap() {
{
put("Moscow", 1.0);
}
}, new HashMap() {
{
put("A", 0.33);
put("B", 0.66);
}
} }, vectorizer, new HashSet() {
{
add(0);
add(1);
add(2);
}
});
double[][] postProcessedData = new double[][] { { 0.33, 1.0, 0.33 }, { 0.66, 1.0, 0.66 }, { 0.66, 1.0, 0.66 } };
for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 0.1);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class OneHotEncoderPreprocessorTest method testApplyWithUnknownCategorialValues.
/**
* The {@code apply()} method is failed with UnknownCategorialFeatureValue exception.
*
* The reason is missed information in encodingValues.
*
* @see UnknownCategorialValueException
*/
@Test
public void testApplyWithUnknownCategorialValues() {
Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {
{
put("2", 0);
}
}, new HashMap() {
{
put("Moscow", 0);
}
}, new HashMap() {
{
put("A", 0);
put("B", 1);
}
} }, vectorizer, new HashSet() {
{
add(0);
add(1);
add(2);
}
});
double[][] postProcessedData = new double[][] { { 0.0, 1.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 0.0, 1.0 } };
try {
for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
fail("UnknownCategorialFeatureValue");
} catch (UnknownCategorialValueException e) {
return;
}
fail("UnknownCategorialFeatureValue");
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class OneHotEncoderPreprocessorTest method testApplyWithStringValues.
/**
* Tests {@code apply()} method.
*/
@Test
public void testApplyWithStringValues() {
Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {
{
put("1", 1);
put("2", 0);
}
}, new HashMap() {
{
put("Moscow", 0);
}
}, new HashMap() {
{
put("A", 0);
put("B", 1);
}
} }, vectorizer, new HashSet() {
{
add(0);
add(1);
add(2);
}
});
double[][] postProcessedData = new double[][] { { 0.0, 1.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 1.0, 0.0 }, { 1.0, 0.0, 1.0, 0.0, 1.0 } };
for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
Aggregations