use of com.alibaba.alink.pipeline.feature.Binarizer in project Alink by alibaba.
the class PipelineModelTest method getPipeline.
protected Pipeline getPipeline() {
// model mapper
QuantileDiscretizer quantileDiscretizer = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
// SISO mapper
Binarizer binarizer = new Binarizer().setSelectedCol("petal_width").setOutputCol("bina").setReservedCols("sepal_length", "petal_width", "petal_length", "category").setThreshold(1.);
// MISO Mapper
VectorAssembler assembler = new VectorAssembler().setSelectedCols("sepal_length", "petal_width").setOutputCol("assem").setReservedCols("sepal_length", "petal_width", "petal_length", "category");
// Lda
Lda lda = new Lda().setPredictionCol("lda_pred").setPredictionDetailCol("lda_pred_detail").setSelectedCol("category").setTopicNum(2).setRandomSeed(0);
return new Pipeline().add(binarizer).add(assembler).add(quantileDiscretizer).add(lda);
}
use of com.alibaba.alink.pipeline.feature.Binarizer in project Alink by alibaba.
the class PipelineSaveAndLoadTest method test2.
@Test
public void test2() throws Exception {
String model_filename = "/tmp/model2.csv";
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
QuantileDiscretizerTrainBatchOp train = new QuantileDiscretizerTrainBatchOp().setNumBuckets(2).setSelectedCols("petal_length").linkFrom(source);
train.link(new AkSinkBatchOp().setFilePath(model_filename).setOverwriteSink(true));
BatchOperator.execute();
// # save pipeline model data to file
String pipelineModelFilename = "/tmp/model23424.csv";
QuantileDiscretizer stage1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
Binarizer stage2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
AkSourceBatchOp modelData = new AkSourceBatchOp().setFilePath(model_filename);
QuantileDiscretizerModel stage3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
PipelineModel prevPipelineModel = new Pipeline(stage1, stage2, stage3).fit(source);
prevPipelineModel.save(pipelineModelFilename, true);
BatchOperator.execute();
}
use of com.alibaba.alink.pipeline.feature.Binarizer in project Alink by alibaba.
the class PipelineSaveAndLoadTest method testPipelineModelLoadSaveNested.
@Test
public void testPipelineModelLoadSaveNested() throws Exception {
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
String pipeline_model_filename = "/tmp/model123.csv";
QuantileDiscretizerModel model1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length").fit(source);
Binarizer model2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
PipelineModel pipeline_model = new PipelineModel(model1, model2);
pipeline_model.save(pipeline_model_filename, true);
BatchOperator.execute();
pipeline_model = PipelineModel.load(pipeline_model_filename);
BatchOperator<?> res = pipeline_model.transform(source);
res.print();
}
use of com.alibaba.alink.pipeline.feature.Binarizer in project Alink by alibaba.
the class LocalPredictorTest method getPipeline.
protected Pipeline getPipeline() {
// model mapper
QuantileDiscretizer quantileDiscretizer = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
// SISO mapper
Binarizer binarizer = new Binarizer().setSelectedCol("petal_width").setOutputCol("bina").setReservedCols("sepal_length", "petal_width", "petal_length", "category").setThreshold(1.);
// MISO Mapper
VectorAssembler assembler = new VectorAssembler().setSelectedCols("sepal_length", "petal_width").setOutputCol("assem").setReservedCols("sepal_length", "petal_width", "petal_length", "category");
// Lda
Lda lda = new Lda().setPredictionCol("lda_pred").setPredictionDetailCol("lda_pred_detail").setSelectedCol("category").setTopicNum(2).setRandomSeed(0);
Select select = new Select().setClause("cast(sepal_length as double) as sepal_length, " + "cast(petal_width as double) as petal_width, " + "cast(petal_length as double) as petal_length, " + "category");
// Glm
GeneralizedLinearRegression glm = new GeneralizedLinearRegression().setFeatureCols("sepal_length", "petal_width").setLabelCol("petal_length").setPredictionCol("glm_pred");
return new Pipeline().add(binarizer).add(assembler).add(quantileDiscretizer).add(glm);
}
use of com.alibaba.alink.pipeline.feature.Binarizer in project Alink by alibaba.
the class PipelineSaveAndLoadTest method test3.
@Test
public void test3() throws Exception {
// # save model data to file (ModelBase)
String modelFilename = "/tmp/model12341.csv";
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("petal_length").fit(source).getModelData().link(new CsvSinkBatchOp().setFilePath(modelFilename).setOverwriteSink(true));
BatchOperator.execute();
// # save pipeline model data to file
QuantileDiscretizerModel model1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length").fit(source);
Binarizer model2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
CsvSourceBatchOp modelData = new CsvSourceBatchOp().setFilePath(modelFilename).setSchemaStr("model_id BIGINT, model_info STRING");
QuantileDiscretizerModel model3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
CsvSourceStreamOp streamSource = new CsvSourceStreamOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
PipelineModel pipelineModel = new PipelineModel(model1, model2, model3);
pipelineModel = PipelineModel.collectLoad(pipelineModel.save());
pipelineModel.transform(streamSource).print();
StreamOperator.execute();
}
Aggregations