Search in sources :

Example 56 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FeatureHasherMapperTest method test3.

@Test
public void test3() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "double", "bool", "number", "str" }, new TypeInformation<?>[] { Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING() });
    Params params = new Params().set(FeatureHasherParams.SELECTED_COLS, new String[] { "double", "bool", "number", "str" }).set(FeatureHasherParams.OUTPUT_COL, "output").set(FeatureHasherParams.NUM_FEATURES, 10).set(FeatureHasherParams.CATEGORICAL_COLS, new String[] { "double" });
    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);
    mapper.open();
    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4), new SparseVector(10, new int[] { 0, 5, 9 }, new double[] { 1.0, 2.0, 1.0 }));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4), new SparseVector(10, new int[] { 1, 5, 6 }, new double[] { 2.0, 1.0, 1.0 }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) FeatureHasherParams(com.alibaba.alink.params.feature.FeatureHasherParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 57 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FeatureHasherMapperTest method test1.

@Test
public void test1() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "double", "bool", "number", "str" }, new TypeInformation<?>[] { Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING() });
    Params params = new Params().set(FeatureHasherParams.SELECTED_COLS, new String[] { "double", "bool", "number", "str" }).set(FeatureHasherParams.OUTPUT_COL, "output").set(FeatureHasherParams.RESERVED_COLS, new String[] {});
    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);
    mapper.open();
    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(0), new SparseVector(262144, new int[] { 62393, 85133, 120275, 214318 }, new double[] { 1.0, 1.0, 1.0, 1.1 }));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "A")).getField(0), new SparseVector(262144, new int[] { 76287, 85133, 120275, 214318 }, new double[] { 1.0, 1.0, 1.0, 2.1 }));
    assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] { "output" }, new TypeInformation<?>[] { VectorTypes.VECTOR }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) FeatureHasherParams(com.alibaba.alink.params.feature.FeatureHasherParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 58 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FeatureHasherMapperTest method test2.

@Test
public void test2() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "double", "bool", "number", "str" }, new TypeInformation<?>[] { Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING() });
    Params params = new Params().set(FeatureHasherParams.SELECTED_COLS, new String[] { "double", "bool", "number", "str" }).set(FeatureHasherParams.OUTPUT_COL, "output").set(FeatureHasherParams.NUM_FEATURES, 10);
    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);
    mapper.open();
    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4), new SparseVector(10, new int[] { 5, 8, 9 }, new double[] { 2.0, 1.1, 1.0 }));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4), new SparseVector(10, new int[] { 1, 5, 6, 8 }, new double[] { 1.0, 1.0, 1.0, 2.1 }));
    assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] { "double", "bool", "number", "str", "output" }, new TypeInformation<?>[] { Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING(), VectorTypes.VECTOR }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) FeatureHasherParams(com.alibaba.alink.params.feature.FeatureHasherParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 59 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class OneHotModelMapperTest method testHandleInvalidVector.

@Test
public void testHandleInvalidVector() throws Exception {
    Params params = new Params().set(OneHotPredictParams.ENCODE, HasEncodeWithoutWoe.Encode.VECTOR).set(OneHotPredictParams.HANDLE_INVALID, HasHandleInvalid.HandleInvalid.SKIP).set(OneHotPredictParams.SELECTED_COLS, new String[] { "cnt", "word", "docid" }).set(OneHotPredictParams.DROP_LAST, false);
    OneHotModelMapper mapper = new OneHotModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(nullElseRow), Row.of(null, new SparseVector(8, new int[] { 7 }, new double[] { 1.0 }), new SparseVector(6, new int[] { 2 }, new double[] { 1.0 })));
    mapper.loadModel(newModel);
    assertEquals(mapper.map(nullElseRow), Row.of(null, null, new SparseVector(5, new int[] { 2 }, new double[] { 1.0 })));
}
Also used : OneHotPredictParams(com.alibaba.alink.params.feature.OneHotPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 60 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class OneHotModelMapperTest method testVector.

@Test
public void testVector() throws Exception {
    Params params = new Params().set(OneHotPredictParams.ENCODE, HasEncodeWithoutWoe.Encode.VECTOR).set(OneHotPredictParams.SELECTED_COLS, new String[] { "cnt", "word", "docid" }).set(OneHotPredictParams.DROP_LAST, false);
    OneHotModelMapper mapper = new OneHotModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(defaultRow), Row.of(new SparseVector(5, new int[] { 2 }, new double[] { 1.0 }), new SparseVector(9, new int[] { 5 }, new double[] { 1.0 }), new SparseVector(7, new int[] { 2 }, new double[] { 1.0 })));
    assertEquals(mapper.map(nullElseRow), Row.of(new SparseVector(5, new int[] { 3 }, new double[] { 1.0 }), new SparseVector(9, new int[] { 8 }, new double[] { 1.0 }), new SparseVector(7, new int[] { 2 }, new double[] { 1.0 })));
    mapper.loadModel(newModel);
    assertEquals(mapper.map(defaultRow), Row.of(new SparseVector(4, new int[] { 2 }, new double[] { 1.0 }), new SparseVector(8, new int[] { 5 }, new double[] { 1.0 }), new SparseVector(6, new int[] { 2 }, new double[] { 1.0 })));
    assertEquals(mapper.map(nullElseRow), Row.of(new SparseVector(4, new int[] { 3 }, new double[] { 1.0 }), new SparseVector(8, new int[] { 7 }, new double[] { 1.0 }), new SparseVector(6, new int[] { 2 }, new double[] { 1.0 })));
}
Also used : OneHotPredictParams(com.alibaba.alink.params.feature.OneHotPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5