Search in sources :

Example 81 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorImputerMapperTest method testValue.

@Test
public void testValue() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"selectedCol\":\"\\\"vec\\\"\",\"fillValue\":\"\\\"-7.0\\\"\",\"strategy\":\"\\\"VALUE\\\"\"}", null) };
    List<Row> model = Arrays.asList(rows);
    TableSchema dataSchema = new TableSchema(new String[] { "vec" }, new TypeInformation<?>[] { Types.STRING });
    Params params = new Params();
    VectorImputerModelMapper mapper = new VectorImputerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of(new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, Double.NaN }))).getField(0), new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, -7.0 }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 82 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorImputerMapperTest method testSparseMax.

@Test
public void testSparseMax() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"selectedCol\":\"\\\"vec\\\"\",\"strategy\":\"\\\"max\\\"\"}", null), Row.of(1048576L, "[1.3333333333333333,0.0,2.0]", null) };
    List<Row> model = Arrays.asList(rows);
    TableSchema dataSchema = new TableSchema(new String[] { "vec" }, new TypeInformation<?>[] { Types.STRING });
    Params params = new Params();
    VectorImputerModelMapper mapper = new VectorImputerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of(new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, Double.NaN }))).getField(0), new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, 2.0 }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 83 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorMinMaxScalerMapperTest method testSparse.

@Test
public void testSparse() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"selectedCol\":\"\\\"vec\\\"\",\"min\":\"0.0\",\"max\":\"1.0\"}", null), Row.of(1048576L, "[-1.0,0,-3.0]", null), Row.of(2097152L, "[4.0,0,2.0]", null) };
    List<Row> model = Arrays.asList(rows);
    TableSchema dataSchema = new TableSchema(new String[] { "vec" }, new TypeInformation<?>[] { Types.STRING });
    Params params = new Params();
    VectorMinMaxScalerModelMapper mapper = new VectorMinMaxScalerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    mapper.open();
    assertEquals(mapper.map(Row.of(new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, 2.0 }))).getField(0), new DenseVector(new double[] { 0.4, 0.5, 1.0 }));
    mapper.close();
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 84 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorStandardScalerMapperTest method testSparse.

@Test
public void testSparse() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"withMean\":\"true\",\"selectedCol\":\"\\\"vec\\\"\",\"withStd\":\"true\"}", null), Row.of(1048576L, "[1.3333333333333333,1.0,0.3333333333333333]", null), Row.of(2097152L, "[2.5166114784235836,1.0,2.886751345948129]", null) };
    List<Row> model = Arrays.asList(rows);
    TableSchema dataSchema = new TableSchema(new String[] { "vec" }, new TypeInformation<?>[] { Types.STRING });
    Params params = new Params();
    VectorStandardScalerModelMapper mapper = new VectorStandardScalerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of(new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, 2.0 }))).getField(0), new DenseVector(new double[] { -0.13245323570650433, -1.0, 0.5773502691896257 }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 85 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class OneHotModelMapperTest method testDropLast.

@Test
public void testDropLast() throws Exception {
    TableSchema dataSchema = new TableSchema(new String[] { "docid", "word", "cnt" }, new TypeInformation<?>[] { Types.STRING, Types.STRING, Types.LONG });
    Params params = new Params().set(OneHotPredictParams.ENCODE, HasEncodeWithoutWoe.Encode.VECTOR).set(OneHotPredictParams.SELECTED_COLS, new String[] { "cnt", "word", "docid" }).set(OneHotPredictParams.DROP_LAST, true);
    OneHotModelMapper mapper = new OneHotModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of("doc0", "天", 4L)), Row.of(new SparseVector(4), new SparseVector(8, new int[] { 5 }, new double[] { 1.0 }), new SparseVector(6, new int[] { 2 }, new double[] { 1.0 })));
    assertEquals(mapper.map(nullElseRow), Row.of(new SparseVector(4, new int[] { 2 }, new double[] { 1.0 }), new SparseVector(8, new int[] { 7 }, new double[] { 1.0 }), new SparseVector(6, new int[] { 2 }, new double[] { 1.0 })));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) OneHotPredictParams(com.alibaba.alink.params.feature.OneHotPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5