use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.
the class LocalPredictorTest method getPipeline.
protected Pipeline getPipeline() {
// model mapper
QuantileDiscretizer quantileDiscretizer = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
// SISO mapper
Binarizer binarizer = new Binarizer().setSelectedCol("petal_width").setOutputCol("bina").setReservedCols("sepal_length", "petal_width", "petal_length", "category").setThreshold(1.);
// MISO Mapper
VectorAssembler assembler = new VectorAssembler().setSelectedCols("sepal_length", "petal_width").setOutputCol("assem").setReservedCols("sepal_length", "petal_width", "petal_length", "category");
// Lda
Lda lda = new Lda().setPredictionCol("lda_pred").setPredictionDetailCol("lda_pred_detail").setSelectedCol("category").setTopicNum(2).setRandomSeed(0);
Select select = new Select().setClause("cast(sepal_length as double) as sepal_length, " + "cast(petal_width as double) as petal_width, " + "cast(petal_length as double) as petal_length, " + "category");
// Glm
GeneralizedLinearRegression glm = new GeneralizedLinearRegression().setFeatureCols("sepal_length", "petal_width").setLabelCol("petal_length").setPredictionCol("glm_pred");
return new Pipeline().add(binarizer).add(assembler).add(quantileDiscretizer).add(glm);
}
use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.
the class Chap01 method c_5_4.
static void c_5_4() throws Exception {
MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
File file = new File(DATA_DIR + "gmv_pipeline.model");
if (file.exists()) {
file.delete();
}
pipeline.fit(train_set).save(DATA_DIR + "gmv_pipeline.model");
BatchOperator.execute();
PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + "gmv_pipeline.model");
BatchOperator<?> pred_batch = new MemSourceBatchOp(new Integer[] { 2018, 2019 }, "x");
pipelineModel.transform(pred_batch).print();
MemSourceStreamOp pred_stream = new MemSourceStreamOp(new Integer[] { 2018, 2019 }, "x");
pipelineModel.transform(pred_stream).print();
StreamOperator.execute();
}
use of com.alibaba.alink.pipeline.sql.Select in project Alink by alibaba.
the class Chap02 method c_6.
static void c_6() throws Exception {
MemSourceBatchOp source = new MemSourceBatchOp(new Row[] { Row.of("sunny", 85.0, 85.0, false, "no"), Row.of("sunny", 80.0, 90.0, true, "no"), Row.of("overcast", 83.0, 78.0, false, "yes"), Row.of("rainy", 70.0, 96.0, false, "yes"), Row.of("rainy", 68.0, 80.0, false, "yes"), Row.of("rainy", 65.0, 70.0, true, "no"), Row.of("overcast", 64.0, 65.0, true, "yes"), Row.of("sunny", 72.0, 95.0, false, "no"), Row.of("sunny", 69.0, 70.0, false, "yes"), Row.of("rainy", 75.0, 80.0, false, "yes"), Row.of("sunny", 75.0, 70.0, true, "yes"), Row.of("overcast", 72.0, 90.0, true, "yes"), Row.of("overcast", 81.0, 75.0, false, "yes"), Row.of("rainy", 71.0, 80.0, true, "no") }, new String[] { "outlook", "Temperature", "Humidity", "Windy", "play" });
source.link(new C45TrainBatchOp().setFeatureCols("outlook", "Temperature", "Humidity", "Windy").setCategoricalCols("outlook", "Windy").setLabelCol("play")).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).setOverwriteSink(true));
BatchOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).link(new DecisionTreeModelInfoBatchOp().lazyPrintModelInfo().lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {
@Override
public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
try {
decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_model.png", true);
} catch (IOException e) {
e.printStackTrace();
}
}
}));
BatchOperator.execute();
MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
pipeline.fit(train_set).save(DATA_DIR + PIPELINE_MODEL_FILE, true);
BatchOperator.execute();
PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + PIPELINE_MODEL_FILE);
TransformerBase<?>[] stages = pipelineModel.getTransformers();
for (int i = 0; i < stages.length; i++) {
System.out.println(String.valueOf(i) + "\t" + stages[i]);
}
((LinearRegressionModel) stages[1]).getModelData().link(new LinearRegModelInfoBatchOp().lazyPrintModelInfo());
BatchOperator.execute();
}
Aggregations