use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap08 method c_7.
static void c_7() throws Exception {
BinaryClassMetrics lr_metrics = new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + LR_PRED_FILE)).collectMetrics();
StringBuilder sbd = new StringBuilder();
sbd.append("< LR >\n").append("AUC : ").append(lr_metrics.getAuc()).append("\t Accuracy : ").append(lr_metrics.getAccuracy()).append("\t Precision : ").append(lr_metrics.getPrecision()).append("\t Recall : ").append(lr_metrics.getRecall()).append("\n");
System.out.println(sbd.toString());
System.out.println(lr_metrics);
lr_metrics.saveRocCurveAsImage(DATA_DIR + "lr_roc.jpg", true);
lr_metrics.saveRecallPrecisionCurveAsImage(DATA_DIR + "lr_recallprec.jpg", true);
lr_metrics.saveLiftChartAsImage(DATA_DIR + "lr_lift.jpg", true);
lr_metrics.saveKSAsImage(DATA_DIR + "lr_ks.jpg", true);
new AkSourceBatchOp().setFilePath(DATA_DIR + SVM_PRED_FILE).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics().lazyCollectMetrics(new Consumer<BinaryClassMetrics>() {
@Override
public void accept(BinaryClassMetrics binaryClassMetrics) {
try {
binaryClassMetrics.saveRocCurveAsImage(DATA_DIR + "svm_roc.jpg", true);
binaryClassMetrics.saveRecallPrecisionCurveAsImage(DATA_DIR + "svm_recallprec.jpg", true);
binaryClassMetrics.saveLiftChartAsImage(DATA_DIR + "svm_lift.jpg", true);
binaryClassMetrics.saveKSAsImage(DATA_DIR + "svm_ks.jpg", true);
} catch (IOException e) {
e.printStackTrace();
}
}
}));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap09 method c_5.
static void c_5() throws Exception {
BatchOperator train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
BatchOperator test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
BatchOperator<?> model = train_data.link(new DecisionTreeTrainBatchOp().setTreeType(treeType).setFeatureCols(FEATURE_COL_NAMES).setCategoricalCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).lazyPrintModelInfo("< " + treeType.toString() + " >").lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {
@Override
public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
try {
decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_" + treeType.toString() + ".jpg", true);
} catch (IOException e) {
e.printStackTrace();
}
}
}));
DecisionTreePredictBatchOp predictor = new DecisionTreePredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
predictor.linkFrom(model, test_data);
predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("< " + treeType.toString() + " >"));
}
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap09 method c_4_a.
static void c_4_a() throws Exception {
AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
NaiveBayesTrainBatchOp trainer = new NaiveBayesTrainBatchOp().setFeatureCols(FEATURE_COL_NAMES).setCategoricalCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME);
NaiveBayesPredictBatchOp predictor = new NaiveBayesPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
train_data.link(trainer);
predictor.linkFrom(trainer, test_data);
trainer.lazyPrintModelInfo();
trainer.lazyCollectModelInfo(new Consumer<NaiveBayesModelInfo>() {
@Override
public void accept(NaiveBayesModelInfo naiveBayesModelInfo) {
StringBuilder sbd = new StringBuilder();
for (String feature : new String[] { "odor", "spore_print_color", "gill_color" }) {
HashMap<Object, HashMap<Object, Double>> map2 = naiveBayesModelInfo.getCategoryFeatureInfo().get(feature);
sbd.append("\nfeature:").append(feature);
for (Entry<Object, HashMap<Object, Double>> entry : map2.entrySet()) {
sbd.append("\n").append(entry.getKey()).append(" : ").append(entry.getValue().toString());
}
}
System.out.println(sbd.toString());
}
});
predictor.lazyPrint(10, "< Prediction >");
predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap09 method c_3.
static void c_3() throws Exception {
CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(Utils.generateSchemaString(COL_NAMES, COL_TYPES));
source.lazyPrint(5, "< origin data >");
Utils.splitTrainTestIfNotExist(source, DATA_DIR + TRAIN_FILE, DATA_DIR + TEST_FILE, 0.9);
new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).link(new ChiSqSelectorBatchOp().setSelectorType(SelectorType.NumTopFeatures).setNumTopFeatures(3).setSelectedCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).lazyPrintModelInfo("< Chi-Square Selector >"));
new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select("veil_type").distinct().lazyPrint(100);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap11 method c_7.
static void c_7() throws Exception {
AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
String[] featureColNames = ArrayUtils.removeElement(test_data.getColNames(), LABEL_COL_NAME);
for (TreeType treeType : new TreeType[] { TreeType.GINI, TreeType.INFOGAIN, TreeType.INFOGAINRATIO }) {
new DecisionTreeClassifier().setTreeType(treeType).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics(treeType.toString()));
}
BatchOperator.execute();
new RandomForestClassifier().setNumTrees(20).setMaxDepth(4).setMaxBins(512).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("RandomForest with Stratified Sample"));
BatchOperator.execute();
}
Aggregations