Search in sources :

Example 16 with DummyVectorizer

use of in project ignite by apache.

the class ImputingExample method main.

 * Run example.
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Imputing example started.");
        IgniteCache<Integer, Vector> data = null;
        try {
            data = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
            // Defines second preprocessor that imputing features.
            Preprocessor<Integer, Vector> preprocessor = new ImputerTrainer<Integer, Vector>().fit(ignite, data, vectorizer);
            // Creates a cache based simple dataset containing features and providing standard dataset API.
            try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, data, preprocessor)) {
                new DatasetHelper(dataset).describe();
            System.out.println(">>> Imputing example completed.");
        } finally {
    } finally {
Also used : DummyVectorizer( Ignite(org.apache.ignite.Ignite) Vector( DenseVector( DatasetHelper(

Example 17 with DummyVectorizer

use of in project ignite by apache.

the class LinearRegressionExportImportExample method main.

 * Run example.
public static void main(String[] args) throws IOException {
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println("\n>>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println("\n>>> Perform the training to get the model.");
            LinearRegressionModel mdl =, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
            System.out.println("\n>>> Exported LinearRegression model: " + mdl);
            double rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
            System.out.println("\n>>> RMSE for exported LinearRegression model: " + rmse);
            jsonMdlPath = Files.createTempFile(null, null);
            LinearRegressionModel modelImportedFromJSON = LinearRegressionModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported LinearRegression model: " + modelImportedFromJSON);
            rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
            System.out.println("\n>>> RMSE for imported LinearRegression model: " + rmse);
            System.out.println("\n>>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
            if (jsonMdlPath != null)
    } finally {
Also used : Path(java.nio.file.Path) LinearRegressionLSQRTrainer( SandboxMLCache( LinearRegressionModel( DummyVectorizer( Ignite(org.apache.ignite.Ignite) Vector(

Example 18 with DummyVectorizer

use of in project ignite by apache.

the class EncoderTrainerTest method testFitOnStringCategorialFeaturesWithFrequencyEncoding.

 * Tests {@code fit()} method.
public void testFitOnStringCategorialFeaturesWithFrequencyEncoding() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(1, new DenseVector(new Serializable[] { "Monday", "September" }));
    data.put(2, new DenseVector(new Serializable[] { "Monday", "August" }));
    data.put(3, new DenseVector(new Serializable[] { "Monday", "August" }));
    data.put(4, new DenseVector(new Serializable[] { "Friday", "June" }));
    data.put(5, new DenseVector(new Serializable[] { "Friday", "June" }));
    data.put(6, new DenseVector(new Serializable[] { "Sunday", "August" }));
    final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1);
    DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
    EncoderTrainer<Integer, Vector> strEncoderTrainer = new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.FREQUENCY_ENCODER).withEncodedFeature(0).withEncodedFeature(1);
    EncoderPreprocessor<Integer, Vector> preprocessor =, datasetBuilder, vectorizer);
    assertArrayEquals(new double[] { 0.5, 0.166 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Monday", "September" })).features().asArray(), 0.1);
    assertArrayEquals(new double[] { 0.33, 0.5 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Friday", "August" })).features().asArray(), 0.1);
    assertArrayEquals(new double[] { 0.166, 0.33 }, preprocessor.apply(7, new DenseVector(new Serializable[] { "Sunday", "June" })).features().asArray(), 0.1);
Also used : Serializable( HashMap(java.util.HashMap) DummyVectorizer( LocalDatasetBuilder( Vector( DenseVector( DenseVector( TrainerTest( Test(org.junit.Test)

Example 19 with DummyVectorizer

use of in project ignite by apache.

the class StringEncoderPreprocessorTest method testApply.

 * Tests {@code apply()} method.
public void testApply() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
    StringEncoderPreprocessor<Integer, Vector> preprocessor = new StringEncoderPreprocessor<Integer, Vector>(new HashMap[] { new HashMap() {

            put("1", 1);
            put("2", 0);
    }, new HashMap() {

            put("Moscow", 0);
    }, new HashMap() {

            put("A", 1);
            put("B", 0);
    } }, vectorizer, new HashSet() {

    double[][] postProcessedData = new double[][] { { 1.0, 0.0, 1.0 }, { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
Also used : HashMap(java.util.HashMap) StringEncoderPreprocessor( DummyVectorizer( Vector( DenseVector( DenseVector( HashSet(java.util.HashSet) Test(org.junit.Test)

Example 20 with DummyVectorizer

use of in project ignite by apache.

the class TargetEncoderPreprocessorTest method testApply.

 * Tests {@code apply()} method.
public void testApply() {
    Vector[] data = new Vector[] { new DenseVector(new Serializable[] { "1", "Moscow", "A" }), new DenseVector(new Serializable[] { "2", "Moscow", "B" }), new DenseVector(new Serializable[] { "3", "Moscow", "B" }) };
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(0, 1, 2);
    TargetEncoderPreprocessor<Integer, Vector> preprocessor = new TargetEncoderPreprocessor<>(new TargetEncodingMeta[] { // feature 0
    new TargetEncodingMeta().withGlobalMean(0.5).withCategoryMean(new HashMap<String, Double>() {

            // category "1" avg mean = 1.0
            put("1", 1.0);
            // category "2" avg mean = 0.0
            put("2", 0.0);
    }), // feature 1
    new TargetEncodingMeta().withGlobalMean(0.1).withCategoryMean(new HashMap<String, Double>() {
    }), // feature 2
    new TargetEncodingMeta().withGlobalMean(0.1).withCategoryMean(new HashMap<String, Double>() {

            // category "A" avg mean 1.0
            put("A", 1.0);
            // category "B" avg mean 2.0
            put("B", 2.0);
    }) }, vectorizer, new HashSet<Integer>() {

    double[][] postProcessedData = new double[][] { { // "1" contains in dict => use category mean 1.0
    1.0, // "Moscow" not contains in dict => use global 0.1
    0.1, // "A" contains in dict => use category mean 1.0
    1.0 }, { // "2" contains in dict => use category mean 0.0
    0.0, // "Moscow" not contains in dict => use global 0.1
    0.1, // "B" contains in dict => use category mean 2.0
    2.0 }, { // "3" not contains in dict => use global mean 0.5
    0.5, // "Moscow" not contains in dict => use global 0.1
    0.1, // "B" contains in dict => use category mean 2.0
    2.0 } };
    for (int i = 0; i < data.length; i++) assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).features().asArray(), 1e-8);
Also used : HashMap(java.util.HashMap) DummyVectorizer( TargetEncoderPreprocessor( TargetEncodingMeta( Vector( DenseVector( DenseVector( Test(org.junit.Test)


DummyVectorizer ( Vector ( DenseVector ( Ignite (org.apache.ignite.Ignite)13 HashMap (java.util.HashMap)10 Test (org.junit.Test)10 DatasetHelper ( HashSet (java.util.HashSet)6 SandboxMLCache ( Serializable ( IgniteCache (org.apache.ignite.IgniteCache)4 OneHotEncoderPreprocessor ( TrainerTest ( LocalDatasetBuilder ( Cache (javax.cache.Cache)2 Ignition (org.apache.ignite.Ignition)2 Vectorizer ( UnknownCategorialValueException ( Preprocessor ( LinearRegressionLSQRTrainer (