Search in sources :

Example 61 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap19 method c_2.

static void c_2() throws Exception {
    MemSourceBatchOp source = new MemSourceBatchOp(CRIME_ROWS_DATA, CRIME_COL_NAMES);
    Pipeline std_pca = new Pipeline().add(new StandardScaler().setSelectedCols("murder", "rape", "robbery", "assault", "burglary", "larceny", "auto")).add(new PCA().setCalculationType(CalculationType.COV).setK(4).setSelectedCols("murder", "rape", "robbery", "assault", "burglary", "larceny", "auto").setPredictionCol(VECTOR_COL_NAME).enableLazyPrintModelInfo());
    std_pca.fit(source).transform(source).link(new VectorToColumnsBatchOp().setVectorCol(VECTOR_COL_NAME).setSchemaStr("prin1 double, prin2 double, prin3 double, prin4 double").setReservedCols("state")).lazyPrint(10, "state with principle components");
    BatchOperator.execute();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) VectorToColumnsBatchOp(com.alibaba.alink.operator.batch.dataproc.format.VectorToColumnsBatchOp) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) Pipeline(com.alibaba.alink.pipeline.Pipeline) PCA(com.alibaba.alink.pipeline.feature.PCA)

Example 62 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap19 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp dense_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE);
    AkSourceBatchOp dense_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE);
    AkSourceBatchOp sparse_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceBatchOp sparse_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
    Stopwatch sw = new Stopwatch();
    sw.reset();
    sw.start();
    new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Dense"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Sparse"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Dense"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Sparse"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Dense"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Sparse"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : PCAModel(com.alibaba.alink.pipeline.feature.PCAModel) EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) KnnClassifier(com.alibaba.alink.pipeline.classification.KnnClassifier) Pipeline(com.alibaba.alink.pipeline.Pipeline) PCA(com.alibaba.alink.pipeline.feature.PCA)

Example 63 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap20 method c_1.

static void c_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TRAIN_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TEST_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
    final String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), Chap10.LABEL_COL_NAME);
    LogisticRegression lr = new LogisticRegression().setFeatureCols(new_features).setLabelCol(Chap10.LABEL_COL_NAME).setPredictionCol(Chap10.PREDICTION_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME);
    Pipeline pipeline = new Pipeline().add(lr);
    GridSearchCV gridSearch = new GridSearchCV().setNumFolds(5).setEstimator(pipeline).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.L_1, new Double[] { 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).enableLazyPrintTrainInfo();
    GridSearchCVModel bestModel = gridSearch.fit(train_data);
    bestModel.transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).lazyPrintMetrics("GridSearchCV"));
    BatchOperator.execute();
}
Also used : ParamGrid(com.alibaba.alink.pipeline.tuning.ParamGrid) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) GridSearchCV(com.alibaba.alink.pipeline.tuning.GridSearchCV) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) GridSearchCVModel(com.alibaba.alink.pipeline.tuning.GridSearchCVModel) BinaryClassificationTuningEvaluator(com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Aggregations

Pipeline (com.alibaba.alink.pipeline.Pipeline)63 Test (org.junit.Test)38 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)34 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)20 Row (org.apache.flink.types.Row)18 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)16 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)16 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)11 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)10 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)9 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)8 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)4 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)4 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)3 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)3 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)3 VectorToTensor (com.alibaba.alink.pipeline.dataproc.VectorToTensor)3