use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class LassoRegressionTest method regressionPipelineTest.
@Test
public void regressionPipelineTest() throws Exception {
BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecRows), veccolNames);
StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecRows), veccolNames);
String[] xVars = new String[] { "f0", "f1", "f2" };
String yVar = "label";
String vec = "vec";
String svec = "svec";
LassoRegression lasso = new LassoRegression().setLabelCol(yVar).setFeatureCols(xVars).setLambda(0.01).setMaxIter(20).setOptimMethod("owlqn").setPredictionCol("linpred");
LassoRegression vlasso = new LassoRegression().setLabelCol(yVar).setVectorCol(vec).setMaxIter(20).setLambda(0.01).setOptimMethod("newton").setPredictionCol("vlinpred").enableLazyPrintModelInfo();
LassoRegression svlasso = new LassoRegression().setLabelCol(yVar).setVectorCol(svec).setMaxIter(20).setLambda(0.01).setPredictionCol("svlinpred");
Pipeline pl = new Pipeline().add(lasso).add(vlasso).add(svlasso);
PipelineModel model = pl.fit(vecdata);
BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "linpred", "vlinpred", "svlinpred" });
List<Row> data = result.collect();
for (Row row : data) {
if ((double) row.getField(0) == 16.8000) {
Assert.assertEquals((double) row.getField(1), 16.784611802507232, 0.01);
Assert.assertEquals((double) row.getField(2), 16.784611802507232, 0.01);
Assert.assertEquals((double) row.getField(3), 16.78209421260283, 0.01);
} else if ((double) row.getField(0) == 6.7000) {
Assert.assertEquals((double) row.getField(1), 6.7713287283076, 0.01);
Assert.assertEquals((double) row.getField(2), 6.7713287283076, 0.01);
Assert.assertEquals((double) row.getField(3), 6.826846826823054, 0.01);
}
}
// below is stream test code
CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "linpred", "vlinpred", "svlinpred" }).link(new CollectSinkStreamOp());
StreamOperator.execute();
List<Row> rows = sop.getAndRemoveValues();
for (Row row : rows) {
if ((double) row.getField(0) == 16.8000) {
Assert.assertEquals((double) row.getField(1), 16.784611802507232, 0.01);
Assert.assertEquals((double) row.getField(2), 16.784611802507232, 0.01);
Assert.assertEquals((double) row.getField(3), 16.78209421260283, 0.01);
} else if ((double) row.getField(0) == 6.7000) {
Assert.assertEquals((double) row.getField(1), 6.7713287283076, 0.01);
Assert.assertEquals((double) row.getField(2), 6.7713287283076, 0.01);
Assert.assertEquals((double) row.getField(3), 6.826846826823054, 0.01);
}
}
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class RidgeRegressionTest method regressionPipelineTest.
@Test
public void regressionPipelineTest() throws Exception {
BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecrows), veccolNames);
String[] xVars = new String[] { "f0", "f1", "f2" };
String yVar = "label";
String vec = "vec";
String svec = "svec";
RidgeRegression ridge = new RidgeRegression().setLabelCol(yVar).setFeatureCols(xVars).setLambda(0.01).setMaxIter(10).setPredictionCol("linpred");
RidgeRegression vridge = new RidgeRegression().setLabelCol(yVar).setVectorCol(vec).setLambda(0.01).setMaxIter(10).setOptimMethod("newton").setPredictionCol("vlinpred");
RidgeRegression svridge = new RidgeRegression().setLabelCol(yVar).setVectorCol(svec).setLambda(0.01).setMaxIter(10).setPredictionCol("svlinpred");
Pipeline pl = new Pipeline().add(ridge).add(vridge).add(svridge);
PipelineModel model = pl.fit(vecdata);
BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "linpred", "vlinpred", "svlinpred" });
List<Row> data = result.collect();
for (Row row : data) {
if ((double) row.getField(0) == 16.8000) {
Assert.assertEquals((double) row.getField(1), 16.77322547668301, 0.01);
Assert.assertEquals((double) row.getField(2), 16.620448399254673, 0.01);
Assert.assertEquals((double) row.getField(3), 16.384437074591887, 0.01);
} else if ((double) row.getField(0) == 6.7000) {
Assert.assertEquals((double) row.getField(1), 6.932628087721653, 0.01);
Assert.assertEquals((double) row.getField(2), 6.775060404865803, 0.01);
Assert.assertEquals((double) row.getField(3), 7.425378715755974, 0.01);
}
}
// below is stream test code
CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "linpred", "vlinpred", "svlinpred" }).link(new CollectSinkStreamOp());
StreamOperator.execute();
List<Row> rows = sop.getAndRemoveValues();
for (Row row : rows) {
if ((double) row.getField(0) == 16.8000) {
Assert.assertEquals((double) row.getField(1), 16.77322547668301, 0.01);
Assert.assertEquals((double) row.getField(2), 16.620448399254673, 0.01);
Assert.assertEquals((double) row.getField(3), 16.384437074591887, 0.01);
} else if ((double) row.getField(0) == 6.7000) {
Assert.assertEquals((double) row.getField(1), 6.932628087721653, 0.01);
Assert.assertEquals((double) row.getField(2), 6.775060404865803, 0.01);
Assert.assertEquals((double) row.getField(3), 7.425378715755974, 0.01);
}
}
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class GridSearchCVTest method testSplit.
@Test
public void testSplit() throws Exception {
List<Row> rows = Arrays.asList(Row.of(1.0, "A", 0, 0, 0), Row.of(2.0, "B", 1, 1, 0), Row.of(3.0, "C", 2, 2, 1), Row.of(4.0, "D", 3, 3, 1), Row.of(1.0, "A", 0, 0, 0), Row.of(2.0, "B", 1, 1, 0), Row.of(3.0, "C", 2, 2, 1), Row.of(4.0, "D", 3, 3, 1), Row.of(1.0, "A", 0, 0, 0), Row.of(2.0, "B", 1, 1, 0), Row.of(3.0, "C", 2, 2, 1));
String[] colNames = new String[] { "f0", "f1", "f2", "f3", "label" };
MemSourceBatchOp data = new MemSourceBatchOp(rows, colNames);
String[] featureColNames = new String[] { colNames[0], colNames[1], colNames[2], colNames[3] };
String[] categoricalColNames = new String[] { colNames[1] };
String labelColName = colNames[4];
RandomForestClassifier rf = new RandomForestClassifier().setFeatureCols(featureColNames).setCategoricalCols(categoricalColNames).setLabelCol(labelColName).setPredictionCol("pred_result").setPredictionDetailCol("pred_detail").setSubsamplingRatio(1.0);
Pipeline pipeline = new Pipeline(rf);
ParamGrid paramGrid = new ParamGrid().addGrid(rf, "SUBSAMPLING_RATIO", new Double[] { 1.0 }).addGrid(rf, "NUM_TREES", new Integer[] { 3 });
BinaryClassificationTuningEvaluator tuning_evaluator = new BinaryClassificationTuningEvaluator().setLabelCol(labelColName).setPredictionDetailCol("pred_detail").setTuningBinaryClassMetric("Accuracy");
GridSearchTVSplit cv = new GridSearchTVSplit().setEstimator(pipeline).setParamGrid(paramGrid).setTuningEvaluator(tuning_evaluator).setTrainRatio(0.8);
ModelBase cvModel = cv.fit(data);
cvModel.transform(data).print();
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class GridSearchCVTest method findBestCluster.
@Test
public void findBestCluster() {
ColumnsToVector columnsToVector = new ColumnsToVector().setSelectedCols(colNames[0], colNames[1]).setVectorCol("vector");
KMeans kMeans = new KMeans().setVectorCol("vector").setPredictionCol("pred");
ParamGrid grid = new ParamGrid().addGrid(kMeans, KMeans.DISTANCE_TYPE, new HasKMeansDistanceType.DistanceType[] { EUCLIDEAN, COSINE });
Pipeline pipeline = new Pipeline().add(columnsToVector).add(kMeans);
GridSearchCV gridSearchCV = new GridSearchCV().setEstimator(pipeline).setParamGrid(grid).setNumFolds(2).setTuningEvaluator(new ClusterTuningEvaluator().setTuningClusterMetric(TuningClusterMetric.RI).setPredictionCol("pred").setVectorCol("vector").setLabelCol("label"));
GridSearchCVModel model = gridSearchCV.fit(memSourceBatchOp);
Assert.assertEquals(testArray.length, model.transform(memSourceBatchOp).collect().size());
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class GridSearchTVSplitTest method findBestCluster.
@Test
public void findBestCluster() throws Exception {
ColumnsToVector columnsToVector = new ColumnsToVector().setSelectedCols(colNames[0], colNames[1]).setVectorCol("vector");
KMeans kMeans = new KMeans().setVectorCol("vector").setPredictionCol("pred");
ParamGrid grid = new ParamGrid().addGrid(kMeans, "distanceType", new HasKMeansDistanceType.DistanceType[] { EUCLIDEAN, COSINE });
Pipeline pipeline = new Pipeline().add(columnsToVector).add(kMeans);
GridSearchTVSplit gridSearchTVSplit = new GridSearchTVSplit().setEstimator(pipeline).setParamGrid(grid).setTrainRatio(0.5).setTuningEvaluator(new ClusterTuningEvaluator().setTuningClusterMetric(TuningClusterMetric.RI).setPredictionCol("pred").setVectorCol("vector").setLabelCol("label"));
GridSearchTVSplitModel model = gridSearchTVSplit.fit(memSourceBatchOp);
Assert.assertEquals(testArray.length, model.transform(memSourceBatchOp).collect().size());
}
Aggregations