Search in sources :

Example 56 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap02 method c_6.

static void c_6() throws Exception {
    MemSourceBatchOp source = new MemSourceBatchOp(new Row[] { Row.of("sunny", 85.0, 85.0, false, "no"), Row.of("sunny", 80.0, 90.0, true, "no"), Row.of("overcast", 83.0, 78.0, false, "yes"), Row.of("rainy", 70.0, 96.0, false, "yes"), Row.of("rainy", 68.0, 80.0, false, "yes"), Row.of("rainy", 65.0, 70.0, true, "no"), Row.of("overcast", 64.0, 65.0, true, "yes"), Row.of("sunny", 72.0, 95.0, false, "no"), Row.of("sunny", 69.0, 70.0, false, "yes"), Row.of("rainy", 75.0, 80.0, false, "yes"), Row.of("sunny", 75.0, 70.0, true, "yes"), Row.of("overcast", 72.0, 90.0, true, "yes"), Row.of("overcast", 81.0, 75.0, false, "yes"), Row.of("rainy", 71.0, 80.0, true, "no") }, new String[] { "outlook", "Temperature", "Humidity", "Windy", "play" });
    source.link(new C45TrainBatchOp().setFeatureCols("outlook", "Temperature", "Humidity", "Windy").setCategoricalCols("outlook", "Windy").setLabelCol("play")).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).link(new DecisionTreeModelInfoBatchOp().lazyPrintModelInfo().lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {

        @Override
        public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
            try {
                decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_model.png", true);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }));
    BatchOperator.execute();
    MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
    Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
    pipeline.fit(train_set).save(DATA_DIR + PIPELINE_MODEL_FILE, true);
    BatchOperator.execute();
    PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + PIPELINE_MODEL_FILE);
    TransformerBase<?>[] stages = pipelineModel.getTransformers();
    for (int i = 0; i < stages.length; i++) {
        System.out.println(String.valueOf(i) + "\t" + stages[i]);
    }
    ((LinearRegressionModel) stages[1]).getModelData().link(new LinearRegModelInfoBatchOp().lazyPrintModelInfo());
    BatchOperator.execute();
}
Also used : C45TrainBatchOp(com.alibaba.alink.operator.batch.classification.C45TrainBatchOp) LinearRegModelInfoBatchOp(com.alibaba.alink.operator.batch.regression.LinearRegModelInfoBatchOp) IOException(java.io.IOException) DecisionTreeModelInfoBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreeModelInfoBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Consumer(java.util.function.Consumer) DecisionTreeModelInfo(com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo) Select(com.alibaba.alink.pipeline.sql.Select) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) LinearRegression(com.alibaba.alink.pipeline.regression.LinearRegression) TransformerBase(com.alibaba.alink.pipeline.TransformerBase)

Example 57 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap08 method c_8.

static void c_8() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    PipelineModel featureExpand = new Pipeline().add(new VectorAssembler().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME + "_0")).add(new VectorPolynomialExpand().setSelectedCol(VEC_COL_NAME + "_0").setOutputCol(VEC_COL_NAME).setDegree(2)).fit(train_data);
    train_data = featureExpand.transform(train_data);
    test_data = featureExpand.transform(test_data);
    train_data.lazyPrint(1);
    new LinearSvm().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LinearSVM"));
    new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
    new LogisticRegression().setOptimMethod(OptimMethod.Newton).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression + OptimMethod.Newton"));
    BatchOperator.execute();
}
Also used : VectorPolynomialExpand(com.alibaba.alink.pipeline.dataproc.vector.VectorPolynomialExpand) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) LinearSvm(com.alibaba.alink.pipeline.classification.LinearSvm) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 58 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap10 method c_3_2.

static void c_3_2() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    Pipeline pipeline = new Pipeline().add(new FeatureHasher().setSelectedCols(FEATURE_COL_NAMES).setCategoricalCols(CATEGORY_FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME)).add(new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME));
    pipeline.fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) FeatureHasher(com.alibaba.alink.pipeline.feature.FeatureHasher) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 59 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap10 method c_3_1.

static void c_3_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    Pipeline pipeline = new Pipeline().add(new OneHotEncoder().setSelectedCols(CATEGORY_FEATURE_COL_NAMES).setEncode(Encode.VECTOR)).add(new VectorAssembler().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME)).add(new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME));
    pipeline.fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : OneHotEncoder(com.alibaba.alink.pipeline.feature.OneHotEncoder) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 60 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap18 method c_1.

static void c_1() throws Exception {
    AkSourceBatchOp dense_source = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE);
    AkSourceBatchOp sparse_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    Stopwatch sw = new Stopwatch();
    ArrayList<Tuple2<String, Pipeline>> pipelineList = new ArrayList<>();
    pipelineList.add(new Tuple2<>("KMeans EUCLIDEAN", new Pipeline().add(new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
    pipelineList.add(new Tuple2<>("KMeans COSINE", new Pipeline().add(new KMeans().setDistanceType(DistanceType.COSINE).setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
    pipelineList.add(new Tuple2<>("BisectingKMeans", new Pipeline().add(new BisectingKMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
    for (Tuple2<String, Pipeline> pipelineTuple2 : pipelineList) {
        sw.reset();
        sw.start();
        pipelineTuple2.f1.fit(dense_source).transform(dense_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics(pipelineTuple2.f0 + " DENSE"));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
        sw.reset();
        sw.start();
        pipelineTuple2.f1.fit(sparse_source).transform(sparse_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics(pipelineTuple2.f0 + " SPARSE"));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) BisectingKMeans(com.alibaba.alink.pipeline.clustering.BisectingKMeans) KMeans(com.alibaba.alink.pipeline.clustering.KMeans) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) ArrayList(java.util.ArrayList) BisectingKMeans(com.alibaba.alink.pipeline.clustering.BisectingKMeans) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Aggregations

Pipeline (com.alibaba.alink.pipeline.Pipeline)63 Test (org.junit.Test)38 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)34 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)20 Row (org.apache.flink.types.Row)18 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)16 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)16 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)11 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)10 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)9 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)8 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)4 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)4 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)3 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)3 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)3 VectorToTensor (com.alibaba.alink.pipeline.dataproc.VectorToTensor)3