use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class StandardScalerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws Exception {
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Standard scaler example started.");
IgniteCache<Integer, Vector> data = null;
try {
data = createCache(ignite);
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
// Defines second preprocessor that scales features.
Preprocessor<Integer, Vector> preprocessor = new StandardScalerTrainer<Integer, Vector>().fit(ignite, data, vectorizer);
// Creates a cache based simple dataset containing features and providing standard dataset API.
try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, data, preprocessor)) {
new DatasetHelper(dataset).describe();
}
System.out.println(">>> Standard scaler example completed.");
} finally {
data.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class OneHotEncoderPreprocessorTest method testTwoCategorialFeatureAndTwoDoubleFeatures.
/**
*/
@Test
public void testTwoCategorialFeatureAndTwoDoubleFeatures() {
Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "42", 1.0, "M", 2.0 }), new DenseVector(new Serializable[] { "43", 2.0, "F", 3.0 }), new DenseVector(new Serializable[] { "42", 3.0, Double.NaN, 4.0 }), new DenseVector(new Serializable[] { "42", 4.0, "F", 5.0 }) };
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2, 3);
HashMap[] encodingValues = new HashMap[4];
encodingValues[0] = new HashMap() {
{
put("42", 0);
put("43", 1);
}
};
encodingValues[2] = new HashMap() {
{
put("F", 0);
put("M", 1);
put("", 2);
}
};
OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(encodingValues, vectorizer, new HashSet() {
{
add(0);
add(2);
}
});
double[][] postProcessedData = new double[][] { { 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0 }, { 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0 }, { 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0 }, { 4.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0 } };
for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class OneHotEncoderPreprocessorTest method testOneCategorialFeature.
/**
*/
@Test
public void testOneCategorialFeature() {
Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "42" }), new DenseVector(new Serializable[] { "43" }), new DenseVector(new Serializable[] { "42" }) };
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0);
OneHotEncoderPreprocessor<Integer, Vector> preprocessor = new OneHotEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {
{
put("42", 0);
put("43", 1);
}
} }, vectorizer, new HashSet() {
{
add(0);
}
});
double[][] postProcessedData = new double[][] { { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 } };
for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class EncoderTrainerTest method testFitWithUnknownStringValueInTheGivenData.
/**
* Tests {@code fit()} method.
*/
@Test
public void testFitWithUnknownStringValueInTheGivenData() {
Map<Integer, Vector> data = new HashMap<>();
data.put(1, VectorUtils.of(3.0, 0.0));
data.put(2, VectorUtils.of(3.0, 12.0));
data.put(3, VectorUtils.of(3.0, 12.0));
data.put(4, VectorUtils.of(2.0, 45.0));
data.put(5, VectorUtils.of(2.0, 45.0));
data.put(6, VectorUtils.of(14.0, 12.0));
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
try {
preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray();
fail("UnknownCategorialFeatureValue");
} catch (UnknownCategorialValueException e) {
return;
}
fail("UnknownCategorialFeatureValue");
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.
the class EncoderTrainerTest method testFitOnStringCategorialFeaturesWithReversedOrder.
/**
* Tests {@code fit()} method.
*/
@Test
public void testFitOnStringCategorialFeaturesWithReversedOrder() {
Map<Integer, Vector> data = new HashMap<>();
data.put(1, new DenseVector(new Serializable[] { "Monday", "September" }));
data.put(2, new DenseVector(new Serializable[] { "Monday", "August" }));
data.put(3, new DenseVector(new Serializable[] { "Monday", "August" }));
data.put(4, new DenseVector(new Serializable[] { "Friday", "June" }));
data.put(5, new DenseVector(new Serializable[] { "Friday", "June" }));
data.put(6, new DenseVector(new Serializable[] { "Sunday", "August" }));
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncoderIndexingStrategy(EncoderSortingStrategy.FREQUENCY_ASC).withEncodedFeature(0).withEncodedFeature(1);
EncoderPreprocessor<Integer, Vector> preprocessor = strEncoderTrainer.fit(TestUtils.testEnvBuilder(), datasetBuilder, vectorizer);
assertArrayEquals(new double[] { 2.0, 0.0 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray(), 1e-8);
}
Aggregations