use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap14 method c_4.
static void c_4() throws Exception {
// load pipeline model
PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
// prepare stream train data
CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING);
if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv").setSchemaStr(SCHEMA_STRING);
// train initial batch model
LogisticRegressionTrainBatchOp lr = new LogisticRegressionTrainBatchOp().setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setMaxIter(10);
feature_pipelineModel.transform(trainBatchData).link(lr).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
BatchOperator.execute();
}
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap17 method c_2_2.
static void c_2_2() throws Exception {
if (!new File(DATA_DIR + VECTOR_FILE).exists()) {
new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + VECTOR_FILE));
BatchOperator.execute();
}
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
source.lazyPrint(5);
KMeansTrainBatchOp kmeans_model = new KMeansTrainBatchOp().setK(2).setVectorCol(VECTOR_COL_NAME);
KMeansPredictBatchOp kmeans_pred = new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME);
source.link(kmeans_model);
kmeans_pred.linkFrom(kmeans_model, source);
kmeans_model.lazyPrintModelInfo();
kmeans_pred.lazyPrint(5);
kmeans_pred.link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KMeans EUCLIDEAN"));
kmeans_pred.orderBy(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME, 200, false).lazyPrint(-1, "all data");
BatchOperator.execute();
new KMeans().setK(2).setDistanceType(DistanceType.COSINE).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans COSINE"));
BatchOperator.execute();
}
Aggregations