Search in sources :

Example 1 with TreeType

use of com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType in project Alink by alibaba.

the class Chap09 method c_5.

static void c_5() throws Exception {
    BatchOperator train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    BatchOperator test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        BatchOperator<?> model = train_data.link(new DecisionTreeTrainBatchOp().setTreeType(treeType).setFeatureCols(FEATURE_COL_NAMES).setCategoricalCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).lazyPrintModelInfo("< " + treeType.toString() + " >").lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {

            @Override
            public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
                try {
                    decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_" + treeType.toString() + ".jpg", true);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }));
        DecisionTreePredictBatchOp predictor = new DecisionTreePredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
        predictor.linkFrom(model, test_data);
        predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("< " + treeType.toString() + " >"));
    }
    BatchOperator.execute();
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Consumer(java.util.function.Consumer) DecisionTreeModelInfo(com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo) IOException(java.io.IOException) DecisionTreePredictBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreePredictBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) DecisionTreeTrainBatchOp(com.alibaba.alink.operator.batch.classification.DecisionTreeTrainBatchOp) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 2 with TreeType

use of com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType in project Alink by alibaba.

the class Chap11 method c_7.

static void c_7() throws Exception {
    AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    String[] featureColNames = ArrayUtils.removeElement(test_data.getColNames(), LABEL_COL_NAME);
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        new DecisionTreeClassifier().setTreeType(treeType).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics(treeType.toString()));
    }
    BatchOperator.execute();
    new RandomForestClassifier().setNumTrees(20).setMaxDepth(4).setMaxBins(512).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("RandomForest with Stratified Sample"));
    BatchOperator.execute();
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) DecisionTreeClassifier(com.alibaba.alink.pipeline.classification.DecisionTreeClassifier) RandomForestClassifier(com.alibaba.alink.pipeline.classification.RandomForestClassifier) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)

Example 3 with TreeType

use of com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType in project Alink by alibaba.

the class Chap13 method c_5.

static void c_5() throws Exception {
    BatchOperator.setParallelism(4);
    if (!new File(DATA_DIR + TABLE_TRAIN_FILE).exists()) {
        AkSourceBatchOp train_sparse = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
        AkSourceBatchOp test_sparse = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
        StringBuilder sbd = new StringBuilder();
        sbd.append("c_0 double");
        for (int i = 1; i < 784; i++) {
            sbd.append(", c_").append(i).append(" double");
        }
        new VectorToColumns().setVectorCol(VECTOR_COL_NAME).setSchemaStr(sbd.toString()).setReservedCols(LABEL_COL_NAME).transform(train_sparse).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TABLE_TRAIN_FILE));
        new VectorToColumns().setVectorCol(VECTOR_COL_NAME).setSchemaStr(sbd.toString()).setReservedCols(LABEL_COL_NAME).transform(test_sparse).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TABLE_TEST_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TABLE_TRAIN_FILE);
    AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TABLE_TEST_FILE);
    final String[] featureColNames = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
    train_data.lazyPrint(5);
    Stopwatch sw = new Stopwatch();
    for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
        sw.reset();
        sw.start();
        new DecisionTreeClassifier().setTreeType(treeType).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("DecisionTreeClassifier " + treeType.toString()));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
    for (int numTrees : new int[] { 2, 4, 8, 16, 32, 64, 128 }) {
        sw.reset();
        sw.start();
        new RandomForestClassifier().setSubsamplingRatio(0.6).setNumTreesOfInfoGain(numTrees).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("RandomForestClassifier : " + numTrees));
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
}
Also used : TreeType(com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType) EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) RandomForestClassifier(com.alibaba.alink.pipeline.classification.RandomForestClassifier) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorToColumns(com.alibaba.alink.pipeline.dataproc.format.VectorToColumns) DecisionTreeClassifier(com.alibaba.alink.pipeline.classification.DecisionTreeClassifier) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)3 TreeType (com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType)3 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)2 DecisionTreeClassifier (com.alibaba.alink.pipeline.classification.DecisionTreeClassifier)2 RandomForestClassifier (com.alibaba.alink.pipeline.classification.RandomForestClassifier)2 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)1 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)1 DecisionTreePredictBatchOp (com.alibaba.alink.operator.batch.classification.DecisionTreePredictBatchOp)1 DecisionTreeTrainBatchOp (com.alibaba.alink.operator.batch.classification.DecisionTreeTrainBatchOp)1 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)1 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)1 DecisionTreeModelInfo (com.alibaba.alink.operator.common.tree.TreeModelInfo.DecisionTreeModelInfo)1 VectorToColumns (com.alibaba.alink.pipeline.dataproc.format.VectorToColumns)1 File (java.io.File)1 IOException (java.io.IOException)1 Consumer (java.util.function.Consumer)1