Search in sources :

Example 1 with ParamGrid

use of com.alibaba.alink.pipeline.tuning.ParamGrid in project Alink by alibaba.

the class Chap23 method c_1.

static void c_1() throws Exception {
    BatchOperator<?> train_set = new LibSvmSourceBatchOp().setFilePath(ORIGIN_DATA_DIR + "train" + File.separator + "labeledBow.feat").setStartIndex(0);
    train_set.lazyPrint(1, "train_set");
    train_set.groupBy("label", "label, COUNT(label) AS cnt").orderBy("label", 100).lazyPrint(-1, "labels of train_set");
    BatchOperator<?> test_set = new LibSvmSourceBatchOp().setFilePath(ORIGIN_DATA_DIR + "test" + File.separator + "labeledBow.feat").setStartIndex(0);
    train_set = train_set.select("CASE WHEN label>5 THEN 'pos' ELSE 'neg' END AS label, " + "features AS " + VECTOR_COL_NAME);
    test_set = test_set.select("CASE WHEN label>5 THEN 'pos' ELSE 'neg' END AS label, " + "features AS " + VECTOR_COL_NAME);
    train_set.lazyPrint(1, "train_set");
    new NaiveBayesTextClassifier().setModelType("Multinomial").setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintModelInfo().fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NaiveBayesTextClassifier + Multinomial"));
    BatchOperator.execute();
    new Pipeline().add(new Binarizer().setSelectedCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1, "After Binarizer")).add(new NaiveBayesTextClassifier().setModelType("Bernoulli").setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintModelInfo()).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("Binarizer + NaiveBayesTextClassifier + Bernoulli"));
    BatchOperator.execute();
    new LogisticRegression().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintTrainInfo("< LR train info >").enableLazyPrintModelInfo("< LR model info >").fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
    BatchOperator.execute();
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    LogisticRegression lr = new LogisticRegression().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    GridSearchCV gridSearch = new GridSearchCV().setEstimator(new Pipeline().add(lr)).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.MAX_ITER, new Integer[] { 10, 20, 30, 40, 50, 60, 80, 100 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(LABEL_COL_NAME).setPositiveLabelValueString("pos").setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).setNumFolds(6).enableLazyPrintTrainInfo();
    GridSearchCVModel bestModel = gridSearch.fit(train_set);
    bestModel.transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
    BatchOperator.execute();
}
Also used : ParamGrid(com.alibaba.alink.pipeline.tuning.ParamGrid) LibSvmSourceBatchOp(com.alibaba.alink.operator.batch.source.LibSvmSourceBatchOp) GridSearchCV(com.alibaba.alink.pipeline.tuning.GridSearchCV) NaiveBayesTextClassifier(com.alibaba.alink.pipeline.classification.NaiveBayesTextClassifier) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) GridSearchCVModel(com.alibaba.alink.pipeline.tuning.GridSearchCVModel) Binarizer(com.alibaba.alink.pipeline.feature.Binarizer) BinaryClassificationTuningEvaluator(com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 2 with ParamGrid

use of com.alibaba.alink.pipeline.tuning.ParamGrid in project Alink by alibaba.

the class Chap20 method c_3.

static void c_3() throws Exception {
    Stopwatch sw = new Stopwatch();
    sw.start();
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(Chap17.DATA_DIR + Chap17.VECTOR_FILE);
    KMeans kmeans = new KMeans().setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME);
    GridSearchCV cv = new GridSearchCV().setNumFolds(4).setEstimator(kmeans).setParamGrid(new ParamGrid().addGrid(kmeans, KMeans.K, new Integer[] { 2, 3, 4, 5, 6 }).addGrid(kmeans, KMeans.DISTANCE_TYPE, new DistanceType[] { DistanceType.EUCLIDEAN, DistanceType.COSINE })).setTuningEvaluator(new ClusterTuningEvaluator().setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME).setLabelCol(Chap17.LABEL_COL_NAME).setTuningClusterMetric(TuningClusterMetric.RI)).enableLazyPrintTrainInfo();
    GridSearchCVModel bestModel = cv.fit(source);
    bestModel.transform(source).link(new EvalClusterBatchOp().setLabelCol(Chap17.LABEL_COL_NAME).setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : ParamGrid(com.alibaba.alink.pipeline.tuning.ParamGrid) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) KMeans(com.alibaba.alink.pipeline.clustering.KMeans) ClusterTuningEvaluator(com.alibaba.alink.pipeline.tuning.ClusterTuningEvaluator) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) GridSearchCV(com.alibaba.alink.pipeline.tuning.GridSearchCV) DistanceType(com.alibaba.alink.params.shared.clustering.HasKMeansDistanceType.DistanceType) GridSearchCVModel(com.alibaba.alink.pipeline.tuning.GridSearchCVModel) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 3 with ParamGrid

use of com.alibaba.alink.pipeline.tuning.ParamGrid in project Alink by alibaba.

the class Chap20 method c_1.

static void c_1() throws Exception {
    BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TRAIN_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
    BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TEST_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
    final String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), Chap10.LABEL_COL_NAME);
    LogisticRegression lr = new LogisticRegression().setFeatureCols(new_features).setLabelCol(Chap10.LABEL_COL_NAME).setPredictionCol(Chap10.PREDICTION_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME);
    Pipeline pipeline = new Pipeline().add(lr);
    GridSearchCV gridSearch = new GridSearchCV().setNumFolds(5).setEstimator(pipeline).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.L_1, new Double[] { 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).enableLazyPrintTrainInfo();
    GridSearchCVModel bestModel = gridSearch.fit(train_data);
    bestModel.transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).lazyPrintMetrics("GridSearchCV"));
    BatchOperator.execute();
}
Also used : ParamGrid(com.alibaba.alink.pipeline.tuning.ParamGrid) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) GridSearchCV(com.alibaba.alink.pipeline.tuning.GridSearchCV) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) GridSearchCVModel(com.alibaba.alink.pipeline.tuning.GridSearchCVModel) BinaryClassificationTuningEvaluator(com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Aggregations

GridSearchCV (com.alibaba.alink.pipeline.tuning.GridSearchCV)3 GridSearchCVModel (com.alibaba.alink.pipeline.tuning.GridSearchCVModel)3 ParamGrid (com.alibaba.alink.pipeline.tuning.ParamGrid)3 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)2 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)2 Pipeline (com.alibaba.alink.pipeline.Pipeline)2 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)2 BinaryClassificationTuningEvaluator (com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator)2 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)1 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)1 LibSvmSourceBatchOp (com.alibaba.alink.operator.batch.source.LibSvmSourceBatchOp)1 DistanceType (com.alibaba.alink.params.shared.clustering.HasKMeansDistanceType.DistanceType)1 NaiveBayesTextClassifier (com.alibaba.alink.pipeline.classification.NaiveBayesTextClassifier)1 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)1 Binarizer (com.alibaba.alink.pipeline.feature.Binarizer)1 ClusterTuningEvaluator (com.alibaba.alink.pipeline.tuning.ClusterTuningEvaluator)1