use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class VectorMaxAbsScalerTest method testPipelineI.
private static void testPipelineI() throws Exception {
Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -4.0"), Row.of("2", "-4.0 3.0") };
TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
String selectedColName = "vec";
VectorMaxAbsScaler scaler = new VectorMaxAbsScaler().setSelectedCol(selectedColName);
VectorMaxAbsScalerModel model = scaler.fit(batchData);
BatchOperator batchRes = model.transform(batchData);
List<Row> batchList = batchRes.getDataSet().collect();
batchList.sort(new RowComparator(0));
assertEquals(batchList.get(0).getField(1), VectorUtil.getVector("0.25 0.5"));
assertEquals(batchList.get(1).getField(1), VectorUtil.getVector("-0.25 -1.0"));
assertEquals(batchList.get(2).getField(1), VectorUtil.getVector("-1.0 0.75"));
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
StreamOperator.execute();
List<Row> result = collectSinkStreamOp.getAndRemoveValues();
result.sort(new RowComparator(0));
assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("0.25 0.5"));
assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-0.25 -1.0"));
assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("-1.0 0.75"));
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class VectorMinMaxScalerTest method testPipeline.
@Test
public void testPipeline() throws Exception {
Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -3.0"), Row.of("2", "4.0 2.0") };
TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
String selectedColName = "vec";
VectorMinMaxScaler scaler = new VectorMinMaxScaler().setSelectedCol(selectedColName);
scaler.setMax(2.0);
scaler.setMin(-3.0);
VectorMinMaxScalerModel model = scaler.fit(batchData);
BatchOperator res = model.transform(batchData);
List<Row> rows = res.getDataSet().collect();
rows.sort(new RowComparator(0));
assertEquals(rows.get(0).getField(1), VectorUtil.getVector("-1.0 2.0"));
assertEquals(rows.get(1).getField(1), VectorUtil.getVector("-3.0 -3.0"));
assertEquals(rows.get(2).getField(1), VectorUtil.getVector("2.0 2.0"));
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(model.transform(streamData));
StreamOperator.execute();
List<Row> result = collectSinkStreamOp.getAndRemoveValues();
result.sort(new RowComparator(0));
assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("-1.0 2.0"));
assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-3.0 -3.0"));
assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("2.0 2.0"));
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class DocCountVectorizerTest method testDefault.
@Test
public void testDefault() throws Exception {
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence", "label" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence", "label" });
DocCountVectorizer op = new DocCountVectorizer().setSelectedCol("sentence").setOutputCol("features").setFeatureType("TF");
DocCountVectorizerModel model = op.fit(data);
assertListRowEqual(expected, model.transform(data).collect(), 0);
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(model.transform(dataStream));
StreamOperator.execute();
assertListRowEqual(expected, sink.getAndRemoveValues(), 0);
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class DocHashCountVectorizerTest method testDefault.
@Test
public void testDefault() throws Exception {
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "sentence", "label" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "sentence", "label" });
DocHashCountVectorizer op = new DocHashCountVectorizer().setSelectedCol("sentence").setNumFeatures(10).setOutputCol("res");
DocHashCountVectorizerModel model = op.fit(data);
assertListRowEqualWithoutOrder(expected, model.transform(data).collect());
CollectSinkStreamOp sink = new CollectSinkStreamOp().linkFrom(model.transform(dataStream));
StreamOperator.execute();
assertListRowEqualWithoutOrder(expected, sink.getAndRemoveValues());
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class AlsTrainBatchOpTest method testStreamPredict.
@Test
public void testStreamPredict() throws Exception {
Long envId = MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID;
StreamOperator<?> streamsamples = new MemSourceStreamOp(rows1, new String[] { "uid", "iid", "label" }).setMLEnvironmentId(envId);
BatchOperator<?> model = train();
AlsRateRecommStreamOp predictor1 = new AlsRateRecommStreamOp(model).setUserCol("uid").setItemCol("iid").setRecommCol("p");
AlsItemsPerUserRecommStreamOp predictor2 = new AlsItemsPerUserRecommStreamOp(model).setUserCol("uid").setRecommCol("p");
AlsUsersPerItemRecommStreamOp predictor3 = new AlsUsersPerItemRecommStreamOp(model).setItemCol("iid").setRecommCol("p");
AlsSimilarUsersRecommStreamOp predictor4 = new AlsSimilarUsersRecommStreamOp(model).setUserCol("uid").setRecommCol("p");
AlsSimilarItemsRecommStreamOp predictor5 = new AlsSimilarItemsRecommStreamOp(model).setItemCol("iid").setRecommCol("p");
StreamOperator<?> result1 = predictor1.linkFrom(streamsamples);
StreamOperator<?> result2 = predictor2.linkFrom(streamsamples);
StreamOperator<?> result3 = predictor3.linkFrom(streamsamples);
StreamOperator<?> result4 = predictor4.linkFrom(streamsamples);
StreamOperator<?> result5 = predictor5.linkFrom(streamsamples);
result2 = result2.select("*, 'AlsItemsPerUserRecommStreamOp' as rec_type");
result3 = result3.select("*, 'AlsUsersPerItemRecommStreamOp' as rec_type");
result4 = result4.select("*, 'AlsSimilarUsersRecommStreamOp' as rec_type");
result5 = result5.select("*, 'AlsSimilarItemsRecommStreamOp' as rec_type");
result1.print();
CollectSinkStreamOp sop = new UnionAllStreamOp().linkFrom(result2, result3, result4, result5).link(new CollectSinkStreamOp());
StreamOperator.execute();
List<Row> res = sop.getAndRemoveValues();
Assert.assertEquals(res.size(), 24);
}
Aggregations