Search in sources :

Example 76 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class DocHashCountVectorizerModelMapperTest method testTF.

@Test
public void testTF() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"TF\\\"\"}"), Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907," + "\"15\":-0.2876820724517809}") };
    List<Row> model = Arrays.asList(rows);
    Params params = new Params().set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence");
    DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0), new SparseVector(20, new int[] { 7, 13, 14, 15 }, new double[] { 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666 }));
}
Also used : DocCountVectorizerTrainParams(com.alibaba.alink.params.nlp.DocCountVectorizerTrainParams) DocHashCountVectorizerPredictParams(com.alibaba.alink.params.nlp.DocHashCountVectorizerPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 77 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class ToMTableMapperTest method test.

@Test
public void test() throws Exception {
    List<Row> rows = new ArrayList<>();
    rows.add(Row.of(1, "2", 0, new Timestamp(20000031232012L), null, new SparseVector(3, new int[] { 1 }, new double[] { 2.0 }), new FloatTensor(new float[] { 3.0f })));
    MTable mTable = new MTable(rows, "col0 int, col1 string, label int, ts timestamp" + ", d_vec DENSE_VECTOR" + ", s_vec VECTOR" + ", tensor FLOAT_TENSOR");
    final Mapper mapper = new ToMTableMapper(new TableSchema(new String[] { "mTable" }, new TypeInformation<?>[] { Types.STRING }), new Params().set(ToMTableParams.SELECTED_COL, "mTable"));
    String mTableStr = mTable.toString();
    MTable result = (MTable) mapper.map(Row.of(mTableStr)).getField(0);
    Assert.assertEquals(mTable.toString(), result.toString());
    System.out.println(mTable.toString());
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) ArrayList(java.util.ArrayList) ToMTableParams(com.alibaba.alink.params.dataproc.ToMTableParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Timestamp(java.sql.Timestamp) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Mapper(com.alibaba.alink.common.mapper.Mapper) MTable(com.alibaba.alink.common.MTable) FloatTensor(com.alibaba.alink.common.linalg.tensor.FloatTensor) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 78 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorAssemblerMapperTest method testSkip.

@Test
public void testSkip() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "c0", "c1" }, new TypeInformation<?>[] { Types.STRING, Types.DOUBLE });
    TableSchema outSchema = new TableSchema(new String[] { "c0", "out" }, new TypeInformation<?>[] { Types.STRING, VectorTypes.VECTOR });
    Params params = new Params().set(VectorAssemblerParams.SELECTED_COLS, new String[] { "c0", "c1" }).set(VectorAssemblerParams.OUTPUT_COL, "out").set(VectorAssemblerParams.HANDLE_INVALID, VectorAssemblerParams.HandleInvalidMethod.SKIP).set(VectorAssemblerParams.RESERVED_COLS, new String[] { "c0" });
    VectorAssemblerMapper mapper = new VectorAssemblerMapper(schema, params);
    /* skip the invalid data. */
    assertEquals(mapper.map(Row.of(new DenseVector(new double[] { 3.0, 4.0 }), new SparseVector(11, new int[] { 0, 10 }, new double[] { 1.0, 4.0 }))).getField(1), new SparseVector(13, new int[] { 0, 1, 2, 12 }, new double[] { 3.0, 4.0, 1.0, 4.0 }));
    assertEquals(mapper.getOutputSchema(), outSchema);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) VectorAssemblerParams(com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 79 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorAssemblerMapperTest method testToDense.

@Test
public void testToDense() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "c0", "c1", "c2" }, new TypeInformation<?>[] { Types.STRING, Types.DOUBLE, Types.STRING });
    TableSchema outSchema = new TableSchema(new String[] { "c0", "c1", "c2", "out" }, new TypeInformation<?>[] { Types.STRING, Types.DOUBLE, Types.STRING, VectorTypes.VECTOR });
    Params params = new Params().set(VectorAssemblerParams.SELECTED_COLS, new String[] { "c0", "c1", "c2" }).set(VectorAssemblerParams.OUTPUT_COL, "out");
    VectorAssemblerMapper mapper = new VectorAssemblerMapper(schema, params);
    /* join the DenseVector, the number and the SparseVector together. the forth field shows the result */
    assertEquals(mapper.map(Row.of(new DenseVector(new double[] { 3.0, 4.0 }), 3.0, new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, 4.0 }))).getField(3), new DenseVector(new double[] { 3.0, 4.0, 3.0, 1.0, 0.0, 4.0 }));
    assertEquals(mapper.getOutputSchema(), outSchema);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) VectorAssemblerParams(com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Example 80 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorAssemblerMapperTest method testToSparse.

@Test
public void testToSparse() throws Exception {
    TableSchema schema = new TableSchema(new String[] { "c0", "c1", "c2" }, new TypeInformation<?>[] { Types.STRING, Types.DOUBLE, Types.STRING });
    TableSchema outSchema = new TableSchema(new String[] { "c0", "out" }, new TypeInformation<?>[] { Types.STRING, VectorTypes.VECTOR });
    Params params = new Params().set(VectorAssemblerParams.SELECTED_COLS, new String[] { "c0", "c1", "c2" }).set(VectorAssemblerParams.OUTPUT_COL, "out").set(VectorAssemblerParams.RESERVED_COLS, new String[] { "c0" });
    VectorAssemblerMapper mapper = new VectorAssemblerMapper(schema, params);
    /* only reverse one column. */
    assertEquals(mapper.map(Row.of(new DenseVector(new double[] { 3.0, 4.0 }), 3.0, new SparseVector(11, new int[] { 0, 10 }, new double[] { 1.0, 4.0 }))).getField(1), new SparseVector(14, new int[] { 0, 1, 2, 3, 13 }, new double[] { 3.0, 4.0, 3.0, 1.0, 4.0 }));
    assertEquals(mapper.getOutputSchema(), outSchema);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) VectorAssemblerParams(com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) Test(org.junit.Test)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5