use of org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder in project ignite by apache.
the class EncoderTrainerTest method testFitWithExceptionOnMissedEncodedFeatureIndex.
/**
* Tests {@code fit()} method.
*/
@Test(expected = org.apache.ignite.ml.math.exceptions.preprocessing.IllegalFeatureTypeException.class)
public void testFitWithExceptionOnMissedEncodedFeatureIndex() {
Map<Integer, Vector> data = new HashMap<>();
data.put(1, new DenseVector(new Serializable[] { 1.0, "Monday", "September" }));
data.put(2, new DenseVector(new Serializable[] { 2.0, "Monday", "August" }));
data.put(3, new DenseVector(new Serializable[] { 3.0, "Monday", "August" }));
data.put(4, new DenseVector(new Serializable[] { 4.0, "Friday", "June" }));
data.put(5, new DenseVector(new Serializable[] { 5.0, "Friday", "June" }));
data.put(6, new DenseVector(new Serializable[] { 6.0, "Sunday", "August" }));
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(1, 2).labeled(0);
DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0);
EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
assertArrayEquals(new double[] { 0.0, 2.0 }, preprocessor.apply(7, new DenseVector(new Serializable[] { 7.0, "Monday", "September" })).features().asArray(), 1e-8);
}
use of org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder in project ignite by apache.
the class EncoderTrainerTest method testFitOnStringCategorialFeatures.
/**
* Tests {@code fit()} method.
*/
@Test
public void testFitOnStringCategorialFeatures() {
Map<Integer, Vector> data = new HashMap<>();
data.put(1, new DenseVector(new Serializable[] { 1.0, "Monday", "September" }));
data.put(2, new DenseVector(new Serializable[] { 2.0, "Monday", "August" }));
data.put(3, new DenseVector(new Serializable[] { 3.0, "Monday", "August" }));
data.put(4, new DenseVector(new Serializable[] { 4.0, "Friday", "June" }));
data.put(5, new DenseVector(new Serializable[] { 5.0, "Friday", "June" }));
data.put(6, new DenseVector(new Serializable[] { 6.0, "Sunday", "August" }));
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(1, 2).labeled(0);
DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
assertArrayEquals(new double[] { 0.0, 2.0 }, preprocessor.apply(7, new DenseVector(new Serializable[] { 7.0, "Monday", "September" })).features().asArray(), 1e-8);
}
use of org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder in project ignite by apache.
the class EncoderTrainerTest method testFitOnStringCategorialFeaturesWithFrequencyEncoding.
/**
* Tests {@code fit()} method.
*/
@Test
public void testFitOnStringCategorialFeaturesWithFrequencyEncoding() {
Map<Integer, Vector> data = new HashMap<>();
data.put(1, new DenseVector(new Serializable[] { "Monday", "September" }));
data.put(2, new DenseVector(new Serializable[] { "Monday", "August" }));
data.put(3, new DenseVector(new Serializable[] { "Monday", "August" }));
data.put(4, new DenseVector(new Serializable[] { "Friday", "June" }));
data.put(5, new DenseVector(new Serializable[] { "Friday", "June" }));
data.put(6, new DenseVector(new Serializable[] { "Sunday", "August" }));
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.FREQUENCY_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
assertArrayEquals(new double[] { 0.5, 0.166 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray(), 0.1);
assertArrayEquals(new double[] { 0.33, 0.5 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Friday", "August" })).features().asArray(), 0.1);
assertArrayEquals(new double[] { 0.166, 0.33 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Sunday", "June" })).features().asArray(), 0.1);
}
use of org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder in project ignite by apache.
the class RegressionEvaluatorTest method testEvaluatorWithoutFilter.
/**
* Test evaluator and trainer.
*/
@Test
public void testEvaluatorWithoutFilter() {
Map<Integer, Vector> data = new HashMap<>();
data.put(0, VectorUtils.of(60323, 83.0, 234289, 2356, 1590, 107608, 1947));
data.put(1, VectorUtils.of(61122, 88.5, 259426, 2325, 1456, 108632, 1948));
data.put(2, VectorUtils.of(60171, 88.2, 258054, 3682, 1616, 109773, 1949));
data.put(3, VectorUtils.of(61187, 89.5, 284599, 3351, 1650, 110929, 1950));
data.put(4, VectorUtils.of(63221, 96.2, 328975, 2099, 3099, 112075, 1951));
data.put(5, VectorUtils.of(63639, 98.1, 346999, 1932, 3594, 113270, 1952));
data.put(6, VectorUtils.of(64989, 99.0, 365385, 1870, 3547, 115094, 1953));
data.put(7, VectorUtils.of(63761, 100.0, 363112, 3578, 3350, 116219, 1954));
data.put(8, VectorUtils.of(66019, 101.2, 397469, 2904, 3048, 117388, 1955));
data.put(9, VectorUtils.of(68169, 108.4, 442769, 2936, 2798, 120445, 1957));
data.put(10, VectorUtils.of(66513, 110.8, 444546, 4681, 2637, 121950, 1958));
data.put(11, VectorUtils.of(68655, 112.6, 482704, 3813, 2552, 123366, 1959));
data.put(12, VectorUtils.of(69564, 114.2, 502601, 3931, 2514, 125368, 1960));
data.put(13, VectorUtils.of(69331, 115.7, 518173, 4806, 2572, 127852, 1961));
data.put(14, VectorUtils.of(70551, 116.9, 554894, 4007, 2827, 130081, 1962));
KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance());
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
LocalDatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
KNNRegressionModel mdl = trainer.fit(datasetBuilder, vectorizer);
double score = Evaluator.evaluate(data, mdl, vectorizer, MetricName.RSS);
assertEquals(5581012.666666679, score, 1e-4);
}
Aggregations