use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap10 method c_1.
static void c_1() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
train_data.lazyPrint(5, "< new features >");
LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME);
LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
train_data.link(trainer);
predictor.linkFrom(trainer, test_data);
trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {
@Override
public void accept(LinearModelTrainInfo linearModelTrainInfo) {
printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
}
});
predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap10 method c_2.
static void c_2() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE).select(CLAUSE_CREATE_FEATURES);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE).select(CLAUSE_CREATE_FEATURES);
String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
train_data.lazyPrint(5, "< new features >");
LogisticRegressionTrainBatchOp trainer = new LogisticRegressionTrainBatchOp().setFeatureCols(new_features).setLabelCol(LABEL_COL_NAME).setL1(0.01);
LogisticRegressionPredictBatchOp predictor = new LogisticRegressionPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
train_data.link(trainer);
predictor.linkFrom(trainer, test_data);
trainer.lazyPrintTrainInfo().lazyCollectTrainInfo(new Consumer<LinearModelTrainInfo>() {
@Override
public void accept(LinearModelTrainInfo linearModelTrainInfo) {
printImportance(linearModelTrainInfo.getColNames(), linearModelTrainInfo.getImportance());
}
});
predictor.link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap10 method c_3_1.
static void c_3_1() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
Pipeline pipeline = new Pipeline().add(new OneHotEncoder().setSelectedCols(CATEGORY_FEATURE_COL_NAMES).setEncode(Encode.VECTOR)).add(new VectorAssembler().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME)).add(new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME));
pipeline.fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap11 method c_8.
static void c_8() throws Exception {
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
String[] featureColNames = ArrayUtils.removeElement(test_data.getColNames(), LABEL_COL_NAME);
new GbdtClassifier().setNumTrees(100).setMaxDepth(5).setMaxBins(256).setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("GBDT with Stratified Sample"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap11 method c_4.
static void c_4() throws Exception {
AkSourceBatchOp all_data = new AkSourceBatchOp().setFilePath(DATA_DIR + FEATURE_LABEL_FILE);
all_data.lazyPrintStatistics().groupBy("label", "label, COUNT(*) AS cnt").print();
Utils.splitTrainTestIfNotExist(all_data, DATA_DIR + TRAIN_FILE, DATA_DIR + TEST_FILE, 0.8);
AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
String[] featureColNames = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
BatchOperator.execute();
if (!new File(DATA_DIR + TRAIN_SAMPLE_FILE).exists()) {
train_data.link(new StratifiedSampleBatchOp().setStrataRatios("0:0.05,1:1.0").setStrataCol(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE));
BatchOperator.execute();
}
AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression with Stratified Sample"));
BatchOperator.execute();
}
Aggregations