use of com.alibaba.alink.operator.batch.dataproc.StratifiedSampleBatchOp in project Alink by alibaba.
the class Chap07 method c_1_4.
static void c_1_4() throws Exception {
CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING);
source.link(new StratifiedSampleBatchOp().setStrataCol("category").setStrataRatios("Iris-versicolor:0.2,Iris-setosa:0.4,Iris-virginica:0.8")).groupBy("category", "category, COUNT(*) AS cnt").print();
CsvSourceStreamOp source_stream = new CsvSourceStreamOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING);
source_stream.link(new StratifiedSampleStreamOp().setStrataCol("category").setStrataRatios("Iris-versicolor:0.2,Iris-setosa:0.4,Iris-virginica:0.8")).print();
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.StratifiedSampleBatchOp in project Alink by alibaba.
the class Chap11 method c_4.
static void c_4() throws Exception {
AkSourceBatchOp all_data = new AkSourceBatchOp().setFilePath(DATA_DIR + FEATURE_LABEL_FILE);
all_data.lazyPrintStatistics().groupBy("label", "label, COUNT(*) AS cnt").print();
Utils.splitTrainTestIfNotExist(all_data, DATA_DIR + TRAIN_FILE, DATA_DIR + TEST_FILE, 0.8);
AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
String[] featureColNames = ArrayUtils.removeElement(train_data.getColNames(), LABEL_COL_NAME);
new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
BatchOperator.execute();
if (!new File(DATA_DIR + TRAIN_SAMPLE_FILE).exists()) {
train_data.link(new StratifiedSampleBatchOp().setStrataRatios("0:0.05,1:1.0").setStrataCol(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE));
BatchOperator.execute();
}
AkSourceBatchOp train_sample = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_SAMPLE_FILE);
new LogisticRegression().setFeatureCols(featureColNames).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_sample).transform(test_data).link(new EvalBinaryClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression with Stratified Sample"));
BatchOperator.execute();
}
Aggregations