use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class VectorSizeHintTest method pipelineStreamTest.
@Test
public void pipelineStreamTest() throws Exception {
StreamOperator streamOperator = new VectorSizeHint().setSelectedCol("c0").setOutputCol("filter_result").setSize(8).transform((StreamOperator) getData(false));
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(streamOperator);
StreamOperator.execute();
List<Row> result = collectSinkStreamOp.getAndRemoveValues();
result.sort(new RowComparator(0));
assertEquals(VectorUtil.getSparseVector(result.get(0).getField(4)), new SparseVector(8, new int[] { 1, 2, 7 }, new double[] { 2.0, 3.0, 4.3 }));
assertEquals(VectorUtil.getSparseVector(result.get(1).getField(4)), new SparseVector(8, new int[] { 1, 2, 7 }, new double[] { 2.0, 3.0, 4.3 }));
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class VectorStandardScalerTest method testPipeline.
public static void testPipeline(boolean withMean, boolean withStdv) throws Exception {
Row[] rowData = new Row[] { Row.of("0", "1.0 2.0"), Row.of("1", "-1.0 -3.0"), Row.of("2", "4.0 2.0") };
TableSchema schema = new TableSchema(new String[] { "id", "vec" }, new TypeInformation<?>[] { Types.STRING, Types.STRING });
BatchOperator batchData = new MemSourceBatchOp(Arrays.asList(rowData), schema);
StreamOperator streamData = new MemSourceStreamOp(Arrays.asList(rowData), schema);
String selectedColName = "vec";
VectorStandardScaler scaler = new VectorStandardScaler().setSelectedCol(selectedColName).setWithMean(withMean).setWithStd(withStdv);
VectorStandardScalerModel denseModel = scaler.fit(batchData);
BatchOperator res = denseModel.transform(batchData);
List<Row> rows = res.getDataSet().collect();
rows.sort(new RowComparator(0));
assertEquals(rows.get(0).getField(1), VectorUtil.getVector("-0.13245323570650433 0.5773502691896257"));
assertEquals(rows.get(1).getField(1), VectorUtil.getVector("-0.9271726499455304 -1.1547005383792515"));
assertEquals(rows.get(2).getField(1), VectorUtil.getVector("1.059625885652035 0.5773502691896257"));
CollectSinkStreamOp collectSinkStreamOp = new CollectSinkStreamOp().linkFrom(denseModel.transform(streamData));
StreamOperator.execute();
List<Row> result = collectSinkStreamOp.getAndRemoveValues();
result.sort(new RowComparator(0));
assertEquals(VectorUtil.getVector(result.get(0).getField(1)), VectorUtil.getVector("-0.13245323570650433 0.5773502691896257"));
assertEquals(VectorUtil.getVector(result.get(1).getField(1)), VectorUtil.getVector("-0.9271726499455304 -1.1547005383792515"));
assertEquals(VectorUtil.getVector(result.get(2).getField(1)), VectorUtil.getVector("1.059625885652035 0.5773502691896257"));
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class BucketizerTest method testBucketizer.
@Test
public void testBucketizer() throws Exception {
Row[] rows = new Row[] { Row.of(1, -999.9, -999.9), Row.of(2, -0.5, -0.2), Row.of(3, -0.3, -0.1), Row.of(4, 0.0, 0.0), Row.of(5, 0.2, 0.4), Row.of(6, 999.9, 999.9) };
double[][] cutsArray = new double[][] { { -0.5, 0.0, 0.5 }, { -0.3, 0.0, 0.3, 0.4 } };
List<Row> expectedRows = Arrays.asList(Row.of(1, 0L), Row.of(2, 0L), Row.of(3, 1L), Row.of(4, 1L), Row.of(5, 2L), Row.of(6, 3L));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "features1", "features2" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "features1", "features2" });
Bucketizer op = new Bucketizer().setSelectedCols(new String[] { "features1", "features2" }).setOutputCols(new String[] { "bucket1", "bucket2" }).setCutsArray(cutsArray);
List<Row> list = op.transform(data).select("id, bucket1").collect();
assertListRowEqual(expectedRows, list, 0);
CollectSinkStreamOp resS = op.transform(dataStream).select("id, bucket1").link(new CollectSinkStreamOp());
StreamOperator.execute();
assertListRowEqual(expectedRows, resS.getAndRemoveValues(), 0);
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class FeatureHasherTest method test.
@Test
public void test() throws Exception {
Row[] rows = new Row[] { Row.of(1, 1.1, true, "2", "A"), Row.of(2, 1.1, false, "2", "B"), Row.of(3, 1.1, true, "1", "B"), Row.of(4, 2.2, true, "1", "A") };
List<Row> expectedRows = Arrays.asList(Row.of(1, new SparseVector(100, new int[] { 9, 38, 45, 95 }, new double[] { 1.0, 1.1, 1.0, 1.0 })), Row.of(2, new SparseVector(100, new int[] { 9, 30, 38, 76 }, new double[] { 1.0, 1.0, 1.1, 1.0 })), Row.of(3, new SparseVector(100, new int[] { 11, 38, 76, 95 }, new double[] { 1.0, 1.1, 1.0, 1.0 })), Row.of(4, new SparseVector(100, new int[] { 11, 38, 45, 95 }, new double[] { 1.0, 2.2, 1.0, 1.0 })));
BatchOperator<?> data = new MemSourceBatchOp(rows, new String[] { "id", "double", "bool", "number", "str" });
StreamOperator<?> dataStream = new MemSourceStreamOp(rows, new String[] { "id", "double", "bool", "number", "str" });
FeatureHasher op = new FeatureHasher().setSelectedCols(new String[] { "double", "bool", "number", "str" }).setNumFeatures(100).setOutputCol("features");
List<Row> list = op.transform(data).select("id, features").collect();
assertListRowEqual(expectedRows, list, 0);
CollectSinkStreamOp resStream = op.transform(dataStream).select("id, features").link(new CollectSinkStreamOp());
StreamOperator.execute();
assertListRowEqual(expectedRows, resStream.getAndRemoveValues(), 0);
}
use of com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp in project Alink by alibaba.
the class KMeansTest method testKmeans.
@Test
public void testKmeans() throws Exception {
KMeans kMeans = new KMeans().setVectorCol("vector").setPredictionCol("pred").setPredictionDistanceCol("distance").setK(2);
PipelineModel model = new Pipeline().add(kMeans).fit(inputBatchOp);
BatchOperator<?> batchPredOp = model.transform(inputBatchOp).select(new String[] { "id", "distance" });
verifyPredResult(batchPredOp.collect());
StreamOperator<?> streamPredOp = model.transform(inputStreamOp).select(new String[] { "id", "distance" });
CollectSinkStreamOp sinkOp = streamPredOp.link(new CollectSinkStreamOp());
StreamOperator.execute();
verifyPredResult(sinkOp.getAndRemoveValues());
}
Aggregations