use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap02 method c_6.
static void c_6() throws Exception {
MemSourceBatchOp source = new MemSourceBatchOp(new Row[] { Row.of("sunny", 85.0, 85.0, false, "no"), Row.of("sunny", 80.0, 90.0, true, "no"), Row.of("overcast", 83.0, 78.0, false, "yes"), Row.of("rainy", 70.0, 96.0, false, "yes"), Row.of("rainy", 68.0, 80.0, false, "yes"), Row.of("rainy", 65.0, 70.0, true, "no"), Row.of("overcast", 64.0, 65.0, true, "yes"), Row.of("sunny", 72.0, 95.0, false, "no"), Row.of("sunny", 69.0, 70.0, false, "yes"), Row.of("rainy", 75.0, 80.0, false, "yes"), Row.of("sunny", 75.0, 70.0, true, "yes"), Row.of("overcast", 72.0, 90.0, true, "yes"), Row.of("overcast", 81.0, 75.0, false, "yes"), Row.of("rainy", 71.0, 80.0, true, "no") }, new String[] { "outlook", "Temperature", "Humidity", "Windy", "play" });
source.link(new C45TrainBatchOp().setFeatureCols("outlook", "Temperature", "Humidity", "Windy").setCategoricalCols("outlook", "Windy").setLabelCol("play")).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).setOverwriteSink(true));
BatchOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TREE_MODEL_FILE).link(new DecisionTreeModelInfoBatchOp().lazyPrintModelInfo().lazyCollectModelInfo(new Consumer<DecisionTreeModelInfo>() {
@Override
public void accept(DecisionTreeModelInfo decisionTreeModelInfo) {
try {
decisionTreeModelInfo.saveTreeAsImage(DATA_DIR + "tree_model.png", true);
} catch (IOException e) {
e.printStackTrace();
}
}
}));
BatchOperator.execute();
MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
pipeline.fit(train_set).save(DATA_DIR + PIPELINE_MODEL_FILE, true);
BatchOperator.execute();
PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + PIPELINE_MODEL_FILE);
TransformerBase<?>[] stages = pipelineModel.getTransformers();
for (int i = 0; i < stages.length; i++) {
System.out.println(String.valueOf(i) + "\t" + stages[i]);
}
((LinearRegressionModel) stages[1]).getModelData().link(new LinearRegModelInfoBatchOp().lazyPrintModelInfo());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap08 method c_9.
static void c_9() throws Exception {
AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
new FmClassifier().setNumEpochs(10).setLearnRate(0.5).setNumFactor(2).setFeatureCols(FEATURE_COL_NAMES).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).enableLazyPrintTrainInfo().enableLazyPrintModelInfo().fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("FM"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap08 method c_8.
static void c_8() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
PipelineModel featureExpand = new Pipeline().add(new VectorAssembler().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME + "_0")).add(new VectorPolynomialExpand().setSelectedCol(VEC_COL_NAME + "_0").setOutputCol(VEC_COL_NAME).setDegree(2)).fit(train_data);
train_data = featureExpand.transform(train_data);
test_data = featureExpand.transform(test_data);
train_data.lazyPrint(1);
new LinearSvm().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LinearSVM"));
new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression"));
new LogisticRegression().setOptimMethod(OptimMethod.Newton).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("LogisticRegression + OptimMethod.Newton"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap09 method c_4_b.
static void c_4_b() throws Exception {
AkSourceBatchOp train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
NaiveBayesTrainBatchOp trainer = new NaiveBayesTrainBatchOp().setFeatureCols("odor", "gill_color").setCategoricalCols("odor", "gill_color").setLabelCol(LABEL_COL_NAME);
NaiveBayesPredictBatchOp predictor = new NaiveBayesPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME);
train_data.link(trainer);
predictor.linkFrom(trainer, test_data);
trainer.lazyCollectModelInfo(new Consumer<NaiveBayesModelInfo>() {
@Override
public void accept(NaiveBayesModelInfo naiveBayesModelInfo) {
StringBuilder sbd = new StringBuilder();
for (String feature : new String[] { "odor", "gill_color" }) {
HashMap<Object, HashMap<Object, Double>> map2 = naiveBayesModelInfo.getCategoryFeatureInfo().get(feature);
sbd.append("\nfeature:").append(feature);
for (Entry<Object, HashMap<Object, Double>> entry : map2.entrySet()) {
sbd.append("\n").append(entry.getKey()).append(" : ").append(entry.getValue().toString());
}
}
System.out.println(sbd.toString());
}
});
predictor.lazyPrint(10, "< Prediction >").link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("p").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap10 method c_3_2.
static void c_3_2() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
Pipeline pipeline = new Pipeline().add(new FeatureHasher().setSelectedCols(FEATURE_COL_NAMES).setCategoricalCols(CATEGORY_FEATURE_COL_NAMES).setOutputCol(VEC_COL_NAME)).add(new LogisticRegression().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME));
pipeline.fit(train_data).transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
}
Aggregations