use of com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp in project Alink by alibaba.
the class Chap14 method c_6.
static void c_6() throws Exception {
// prepare stream train data
CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
// load pipeline model
PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
// split stream to train and eval data
SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
StreamOperator<?> train_stream_data = feature_pipelineModel.transform(spliter);
StreamOperator<?> test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
AkSourceBatchOp initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
// ftrl train
FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
// model filter
FtrlModelFilterStreamOp model_filter = new FtrlModelFilterStreamOp().setPositiveLabelValueString("1").setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setAccuracyThreshold(0.83).setAucThreshold(0.71).linkFrom(model, train_stream_data);
model_filter.select("'Model' AS out_type, *").print();
// ftrl predict
FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model_filter, test_stream_data);
predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
// ftrl eval
predResult.link(new EvalBinaryClassStreamOp().setPositiveLabelValueString("1").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
StreamOperator.execute();
}
use of com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp in project Alink by alibaba.
the class FTRLExample method main.
public static void main(String[] args) throws Exception {
String schemaStr = "id string, click string, dt string, C1 string, banner_pos int, site_id string, site_domain string, " + "site_category string, app_id string, app_domain string, app_category string, device_id string, " + "device_ip string, device_model string, device_type string, device_conn_type string, C14 int, C15 int, " + "C16 int, C17 int, C18 int, C19 int, C20 int, C21 int";
CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv").setSchemaStr(schemaStr);
trainBatchData.firstN(10).print();
String labelColName = "click";
String[] selectedColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21", "site_id", "site_domain", "device_id", "device_model" };
String[] categoryColNames = new String[] { "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "site_id", "site_domain", "device_id", "device_model" };
String[] numericalColNames = new String[] { "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21" };
// result column name of feature engineering
String vecColName = "vec";
int numHashFeatures = 30000;
// setup feature engineering pipeline
Pipeline featurePipeline = new Pipeline().add(new StandardScaler().setSelectedCols(numericalColNames)).add(new FeatureHasher().setSelectedCols(selectedColNames).setCategoricalCols(categoryColNames).setOutputCol(vecColName).setNumFeatures(numHashFeatures));
// fit feature pipeline model
PipelineModel featurePipelineModel = featurePipeline.fit(trainBatchData);
// prepare stream train data
CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(schemaStr).setIgnoreFirstLine(true);
// split stream to train and eval data
SplitStreamOp splitter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
// train initial batch model
LogisticRegressionTrainBatchOp lr = new LogisticRegressionTrainBatchOp().setVectorCol(vecColName).setLabelCol(labelColName).setWithIntercept(true).setMaxIter(10);
BatchOperator<?> initModel = featurePipelineModel.transform(trainBatchData).link(lr);
// ftrl train
FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(vecColName).setLabelCol(labelColName).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(numHashFeatures).linkFrom(featurePipelineModel.transform(splitter));
// ftrl predict
FtrlPredictStreamOp predictResult = new FtrlPredictStreamOp(initModel).setVectorCol(vecColName).setPredictionCol("pred").setReservedCols(new String[] { labelColName }).setPredictionDetailCol("details").linkFrom(model, featurePipelineModel.transform(splitter.getSideOutput(0)));
// ftrl eval
predictResult.link(new EvalBinaryClassStreamOp().setLabelCol(labelColName).setPredictionCol("pred").setPredictionDetailCol("details").setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).print();
}
use of com.alibaba.alink.operator.stream.onlinelearning.FtrlTrainStreamOp in project Alink by alibaba.
the class Chap14 method c_5.
static void c_5() throws Exception {
// load pipeline model
PipelineModel feature_pipelineModel = PipelineModel.load(DATA_DIR + FEATURE_MODEL_FILE);
BatchOperator initModel = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
// prepare stream train data
CsvSourceStreamOp data = new CsvSourceStreamOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-ctr-train-8M.csv").setSchemaStr(SCHEMA_STRING).setIgnoreFirstLine(true);
// split stream to train and eval data
SplitStreamOp spliter = new SplitStreamOp().setFraction(0.5).linkFrom(data);
StreamOperator train_stream_data = feature_pipelineModel.transform(spliter);
StreamOperator test_stream_data = feature_pipelineModel.transform(spliter.getSideOutput(0));
// ftrl train
FtrlTrainStreamOp model = new FtrlTrainStreamOp(initModel).setVectorCol(VEC_COL_NAME).setLabelCol(LABEL_COL_NAME).setWithIntercept(true).setAlpha(0.1).setBeta(0.1).setL1(0.01).setL2(0.01).setTimeInterval(10).setVectorSize(NUM_HASH_FEATURES).linkFrom(train_stream_data);
// ftrl predict
FtrlPredictStreamOp predResult = new FtrlPredictStreamOp(initModel).setVectorCol(VEC_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setReservedCols(new String[] { LABEL_COL_NAME }).setPredictionDetailCol(PRED_DETAIL_COL_NAME).linkFrom(model, test_stream_data);
predResult.sample(0.0001).select("'Pred Sample' AS out_type, *").print();
// ftrl eval
predResult.link(new EvalBinaryClassStreamOp().setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).setTimeInterval(10)).link(new JsonValueStreamOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath(new String[] { "$.Accuracy", "$.AUC", "$.ConfusionMatrix" })).select("'Eval Metric' AS out_type, *").print();
StreamOperator.execute();
}
Aggregations