Search in sources :

Example 1 with EvalBinaryClassBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.

the class Chap08 method c_7.

static void c_7() throws Exception {
    BinaryClassMetrics lr_metrics = new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + LR_PRED_FILE)).collectMetrics();
    StringBuilder sbd = new StringBuilder();
    sbd.append("< LR >\n").append("AUC : ").append(lr_metrics.getAuc()).append("\t Accuracy : ").append(lr_metrics.getAccuracy()).append("\t Precision : ").append(lr_metrics.getPrecision()).append("\t Recall : ").append(lr_metrics.getRecall()).append("\n");
    System.out.println(sbd.toString());
    System.out.println(lr_metrics);
    lr_metrics.saveRocCurveAsImage(DATA_DIR + "lr_roc.jpg", true);
    lr_metrics.saveRecallPrecisionCurveAsImage(DATA_DIR + "lr_recallprec.jpg", true);
    lr_metrics.saveLiftChartAsImage(DATA_DIR + "lr_lift.jpg", true);
    lr_metrics.saveKSAsImage(DATA_DIR + "lr_ks.jpg", true);
    new AkSourceBatchOp().setFilePath(DATA_DIR + SVM_PRED_FILE).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics().lazyCollectMetrics(new Consumer<BinaryClassMetrics>() {

        @Override
        public void accept(BinaryClassMetrics binaryClassMetrics) {
            try {
                binaryClassMetrics.saveRocCurveAsImage(DATA_DIR + "svm_roc.jpg", true);
                binaryClassMetrics.saveRecallPrecisionCurveAsImage(DATA_DIR + "svm_recallprec.jpg", true);
                binaryClassMetrics.saveLiftChartAsImage(DATA_DIR + "svm_lift.jpg", true);
                binaryClassMetrics.saveKSAsImage(DATA_DIR + "svm_ks.jpg", true);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }));
    BatchOperator.execute();
}
Also used : BinaryClassMetrics(com.alibaba.alink.operator.common.evaluation.BinaryClassMetrics) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Consumer(java.util.function.Consumer) IOException(java.io.IOException) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 2 with EvalBinaryClassBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.

the class Chap09 method c_5.

static void c_5() throws Exception {
    BatchOperator train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        BatchOperator<?> model = train_data.link(new DecisionTreeTrainBatchOp().setTreeType(treeType).setFeatureCols(FEATURE_COL_NAMES).setCategoricalCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).lazyPrintModelInfo("< " + treeType.toString() + " >").lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {

            @Override
            public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
                try {
                    decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_" + treeType.toString() + ".jpg", true);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }));
        DecisionTreePredictBatchOp predictor = new DecisionTreePredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
        predictor.linkFrom(model, test_data);
        predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("< " + treeType.toString() + " >"));
    }
    BatchOperator.execute();
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Consumer(java.util.function.Consumer) DecisionTreeModelInfo(com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo) IOException(java.io.IOException) DecisionTreePredictBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreePredictBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) DecisionTreeTrainBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreeTrainBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 3 with EvalBinaryClassBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.

the class Chap09 method c_4_a.

static void c_4_a() throws Exception {
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    NaiveBayesTrainBatchOp trainer = new NaiveBayesTrainBatchOp().setFeatureCols(FEATURE_COL_NAMES).setCategoricalCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME);
    NaiveBayesPredictBatchOp predictor = new NaiveBayesPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
    train_data.link(trainer);
    predictor.linkFrom(trainer, test_data);
    trainer.lazyPrintModelInfo();
    trainer.lazyCollectModelInfo(new Consumer<NaiveBayesModelInfo>() {

        @Override
        public void accept(NaiveBayesModelInfo naiveBayesModelInfo) {
            StringBuilder sbd = new StringBuilder();
            for (String feature : new String[] { "odor", "spore_print_color", "gill_color" }) {
                HashMap<Object, HashMap<Object, Double>> map2 = naiveBayesModelInfo.getCategoryFeatureInfo().get(feature);
                sbd.append("\nfeature:").append(feature);
                for (Entry<Object, HashMap<Object, Double>> entry : map2.entrySet()) {
                    sbd.append("\n").append(entry.getKey()).append(" : ").append(entry.getValue().toString());
                }
            }
            System.out.println(sbd.toString());
        }
    });
    predictor.lazyPrint(10, "< Prediction >");
    predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : HashMap(java.util.HashMap) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp) Entry(java.util.Map.Entry) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) NaiveBayesModelInfo(com.alibaba.alink.operator.batch.classification.NaiveBayesModelInfo) NaiveBayesPredictBatchOp(com.alibaba.alink.operator.batch.classification.NaiveBayesPredictBatchOp) NaiveBayesTrainBatchOp(com.alibaba.alink.operator.batch.classification.NaiveBayesTrainBatchOp)

Example 4 with EvalBinaryClassBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.

the class Chap11 method c_7.

static void c_7() throws Exception {
    AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    String[] featureColNames = ArrayUtils.removeElement(test_data.getColNames(), LABEL_COL_NAME);
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        new DecisionTreeClassifier().setTreeType(treeType).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics(treeType.toString()));
    }
    BatchOperator.execute();
    new RandomForestClassifier().setNumTrees(20).setMaxDepth(4).setMaxBins(512).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("RandomForest with Stratified Sample"));
    BatchOperator.execute();
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) DecisionTreeClassifier(com.alibaba.alink.pipeline.classification.DecisionTreeClassifier) RandomForestClassifier(com.alibaba.alink.pipeline.classification.RandomForestClassifier) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 5 with EvalBinaryClassBatchOp

use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.

the class FmClassifierTest method testFm.

@Test
public void testFm() {
    BatchOperator<?> trainData = new MemSourceBatchOp(new Object[][] { { "1.1 2.0", 1.0 }, { "2.1 3.1", 1.0 }, { "3.1 2.2", 1.0 }, { "1.2 3.2", 0.0 }, { "1.2 4.2", 0.0 } }, new String[] { "vec", "label" });
    FmClassifierTrainBatchOp adagrad = new FmClassifierTrainBatchOp().setVectorCol("vec").setLabelCol("label").setNumEpochs(10).setInitStdev(0.01).setLearnRate(0.01).setEpsilon(0.0001).linkFrom(trainData);
    adagrad.lazyPrintModelInfo();
    adagrad.lazyPrintTrainInfo();
    BatchOperator<?> result = new FmPredictBatchOp().setVectorCol("vec").setPredictionCol("pred").setPredictionDetailCol("details").linkFrom(adagrad, trainData);
    List<Row> eval = new EvalBinaryClassBatchOp().setLabelCol("label").setPredictionDetailCol("details").linkFrom(result).link(new JsonValueBatchOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath("$.Accuracy", "$.AUC", "$.ConfusionMatrix")).collect();
    Assert.assertEquals(Double.parseDouble(eval.get(0).getField(0).toString()), 0.6, 0.01);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) JsonValueBatchOp(com.alibaba.alink.operator.batch.dataproc.JsonValueBatchOp) Row(org.apache.flink.types.Row) FmPredictBatchOp(com.alibaba.alink.operator.common.fm.FmPredictBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp) Test(org.junit.Test)

Aggregations

EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)23 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)18 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)9 Pipeline (com.alibaba.alink.pipeline.Pipeline)8 Row (org.apache.flink.types.Row)5 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)4 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)4 DocCountVectorizer (com.alibaba.alink.pipeline.nlp.DocCountVectorizer)3 RegexTokenizer (com.alibaba.alink.pipeline.nlp.RegexTokenizer)3 BinaryClassificationTuningEvaluator (com.alibaba.alink.pipeline.tuning.BinaryClassificationTuningEvaluator)3 File (java.io.File)3 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)2 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)2 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)2 NaiveBayesModelInfo (com.alibaba.alink.operator.batch.classification.NaiveBayesModelInfo)2 NaiveBayesPredictBatchOp (com.alibaba.alink.operator.batch.classification.NaiveBayesPredictBatchOp)2 NaiveBayesTrainBatchOp (com.alibaba.alink.operator.batch.classification.NaiveBayesTrainBatchOp)2 JsonValueBatchOp (com.alibaba.alink.operator.batch.dataproc.JsonValueBatchOp)2 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)2 BinaryClassMetrics (com.alibaba.alink.operator.common.evaluation.BinaryClassMetrics)2