use of com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator in project Alink by alibaba.
the class Chap23 method c_1.
static void c_1() throws Exception {
BatchOperator<?> train_set = new LibSvmSourceBatchOp().setFilePath(ORIGIN_DATA_DIR + "train" + File.separator + "labeledBow.feat").setStartIndex(0);
train_set.lazyPrint(1, "train_set");
train_set.groupBy("label", "label, COUNT(label) AS cnt").orderBy("label", 100).lazyPrint(-1, "labels of train_set");
BatchOperator<?> test_set = new LibSvmSourceBatchOp().setFilePath(ORIGIN_DATA_DIR + "test" + File.separator + "labeledBow.feat").setStartIndex(0);
train_set = train_set.select("CASE WHEN label>5 THEN 'pos' ELSE 'neg' END AS label, " + "features AS " + VECTOR_COL_NAME);
test_set = test_set.select("CASE WHEN label>5 THEN 'pos' ELSE 'neg' END AS label, " + "features AS " + VECTOR_COL_NAME);
train_set.lazyPrint(1, "train_set");
new NaiveBayesTextClassifier().setModelType("Multinomial").setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintModelInfo().fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NaiveBayesTextClassifier + Multinomial"));
BatchOperator.execute();
new Pipeline().add(new Binarizer().setSelectedCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1, "After Binarizer")).add(new NaiveBayesTextClassifier().setModelType("Bernoulli").setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintModelInfo()).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("Binarizer + NaiveBayesTextClassifier + Bernoulli"));
BatchOperator.execute();
new LogisticRegression().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintTrainInfo("< LR train info >").enableLazyPrintModelInfo("< LR model info >").fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
BatchOperator.execute();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
LogisticRegression lr = new LogisticRegression().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
GridSearchCV gridSearch = new GridSearchCV().setEstimator(new Pipeline().add(lr)).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.MAX_ITER, new Integer[] { 10, 20, 30, 40, 50, 60, 80, 100 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(LABEL_COL_NAME).setPositiveLabelValueString("pos").setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).setNumFolds(6).enableLazyPrintTrainInfo();
GridSearchCVModel bestModel = gridSearch.fit(train_set);
bestModel.transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
BatchOperator.execute();
}
use of com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator in project Alink by alibaba.
the class Chap20 method c_1.
static void c_1() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TRAIN_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TEST_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
final String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), Chap10.LABEL_COL_NAME);
LogisticRegression lr = new LogisticRegression().setFeatureCols(new_features).setLabelCol(Chap10.LABEL_COL_NAME).setPredictionCol(Chap10.PREDICTION_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME);
Pipeline pipeline = new Pipeline().add(lr);
GridSearchCV gridSearch = new GridSearchCV().setNumFolds(5).setEstimator(pipeline).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.L_1, new Double[] { 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).enableLazyPrintTrainInfo();
GridSearchCVModel bestModel = gridSearch.fit(train_data);
bestModel.transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).lazyPrintMetrics("GridSearchCV"));
BatchOperator.execute();
}
use of com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator in project Alink by alibaba.
the class Chap20 method c_2.
static void c_2() throws Exception {
Stopwatch sw = new Stopwatch();
sw.start();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
BatchOperator train_sample = new AkSourceBatchOp().setFilePath(Chap11.DATA_DIR + Chap11.TRAIN_SAMPLE_FILE);
BatchOperator test_data = new AkSourceBatchOp().setFilePath(Chap11.DATA_DIR + Chap11.TEST_FILE);
final String[] featuresColNames = ArrayUtils.removeElement(train_sample.getColNames(), Chap11.LABEL_COL_NAME);
GbdtClassifier gbdt = new GbdtClassifier().setFeatureCols(featuresColNames).setLabelCol(Chap11.LABEL_COL_NAME).setPredictionCol(Chap11.PREDICTION_COL_NAME).setPredictionDetailCol(Chap11.PRED_DETAIL_COL_NAME);
RandomSearchTVSplit randomSearch = new RandomSearchTVSplit().setNumIter(20).setTrainRatio(0.8).setEstimator(gbdt).setParamDist(new ParamDist().addDist(gbdt, GbdtClassifier.NUM_TREES, ValueDist.randArray(new Integer[] { 50, 100 })).addDist(gbdt, GbdtClassifier.MAX_DEPTH, ValueDist.randInteger(4, 10)).addDist(gbdt, GbdtClassifier.MAX_BINS, ValueDist.randArray(new Integer[] { 64, 128, 256, 512 })).addDist(gbdt, GbdtClassifier.LEARNING_RATE, ValueDist.randArray(new Double[] { 0.3, 0.1, 0.01 }))).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(Chap11.LABEL_COL_NAME).setPredictionDetailCol(Chap11.PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.F1)).enableLazyPrintTrainInfo();
RandomSearchTVSplitModel bestModel = randomSearch.fit(train_sample);
bestModel.transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(Chap11.LABEL_COL_NAME).setPredictionDetailCol(Chap11.PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
Aggregations