use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class AkExample method main.
public static void main(String[] args) throws Exception {
String URL = "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/iris.csv";
String SCHEMA_STR = "sepal_length double, sepal_width double, petal_length double, petal_width double, category string";
// Note: Complete the parameter below with the right oss configure.
BaseFileSystem<?> ossFileSystem = new OssFileSystem("OssVersion", "OssEndPoint", "OssBucket", "OssId", "OssKey");
// Note: Complete the parameter below with the right hdfs configure.
BaseFileSystem<?> hadoopFileSystem = new HadoopFileSystem("HadoopVersion", "HdfsFileSystemUri");
// csv to oss
CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp().setFilePath(URL).setSchemaStr(SCHEMA_STR);
AkSinkBatchOp akSinkToOss = new AkSinkBatchOp().setFilePath(new FilePath("iris", ossFileSystem)).setOverwriteSink(true);
csvSourceBatchOp.link(akSinkToOss);
BatchOperator.execute();
// oss to hdfs
AkSourceBatchOp akSourceFromOss = new AkSourceBatchOp().setFilePath(new FilePath("iris", ossFileSystem));
AkSinkBatchOp akSinkToHdfs = new AkSinkBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem)).setOverwriteSink(true);
akSourceFromOss.link(akSinkToHdfs);
BatchOperator.execute();
// hdfs to stdout
AkSourceBatchOp akSourceFromHdfs = new AkSourceBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem));
akSourceFromHdfs.firstN(10).print();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap21 method c_7.
private static void c_7() throws Exception {
BatchOperator<?> docs = getSource().select(LABEL_COL_NAME + ", " + TXT_COL_NAME).link(new SegmentBatchOp().setSelectedCol(TXT_COL_NAME)).link(new StopWordsRemoverBatchOp().setSelectedCol(TXT_COL_NAME));
docs.lazyPrint(10);
if (!new File(DATA_DIR + LDA_MODEL_FILE).exists()) {
LdaTrainBatchOp lda = new LdaTrainBatchOp().setTopicNum(10).setNumIter(200).setVocabSize(20000).setSelectedCol(TXT_COL_NAME).setRandomSeed(123);
docs.link(lda);
lda.lazyPrintModelInfo();
lda.link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE));
lda.getSideOutput(0).link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE));
BatchOperator.execute();
}
new LdaPredictBatchOp().setSelectedCol(TXT_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol("predinfo").linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE), docs).lazyPrint(5).link(new EvalClusterBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics());
AkSourceBatchOp pwz = new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE);
pwz.sample(0.001).lazyPrint(10);
for (int t = 0; t < 10; t++) {
pwz.select("word, topic_" + t).orderBy("topic_" + t, 20, false).lazyPrint(-1, "topic" + t);
}
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap22 method c_2.
static void c_2() throws Exception {
BatchOperator.setParallelism(1);
TextSourceBatchOp source = new TextSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE);
source.lazyPrint(8);
final String[] CHARACTER_DICT = new String[] { "曹操", "孔明", "玄德", "刘玄德", "刘备", "关羽", "张飞", "赵云", "曹孟德", "诸葛亮", "张郃", "孙权", "张辽", "鲁肃" };
source.link(new SegmentBatchOp().setSelectedCol("text").setUserDefinedDict(CHARACTER_DICT)).link(new StopWordsRemoverBatchOp().setSelectedCol("text")).link(new WordCountBatchOp().setSelectedCol("text")).orderBy("cnt", 100, false).print();
if (!new File(DATA_DIR + W2V_MODEL_FILE).exists()) {
source.link(new SegmentBatchOp().setSelectedCol("text").setUserDefinedDict(CHARACTER_DICT)).link(new StopWordsRemoverBatchOp().setSelectedCol("text").setStopWords("亦", "曰", "遂", "吾", "已", "去", "二人", "今", "使", "中", "知", "不", "见", "都", "令", "却", "欲", "请", "人", "谓", "不可", "闻", "前", "后", "皆", "便", "问", "日", "时", "耳", "不敢", "问", "回", "才", "之事", "之人", "之时", "料", "今日", "令人", "受", "说", "出", "已毕", "不得", "使人", "众", "何不", "不知", "再", "处", "无", "即日", "诸", "此时", "只", "下", "还", "上", "杀", "将军", "却说", "兵", "汝", "走", "言", "寨", "不能", "斩", "死", "商议", "听", "军士", "军", "左右", "军马", "引兵", "次日", "二", "看", "耶", "退", "更", "毕", "正", "一人", "原来", "大笑", "车胄", "口", "引", "大喜", "其事", "助", "事", "未", "大", "至此", "讫", "心中", "敢")).link(new Word2VecTrainBatchOp().setSelectedCol("text").setMinCount(10).setNumIter(50)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + W2V_MODEL_FILE));
BatchOperator.execute();
}
AkSourceBatchOp word2vec = new AkSourceBatchOp().setFilePath(DATA_DIR + W2V_MODEL_FILE);
new VectorNearestNeighbor().setIdCol("word").setSelectedCol("vec").setTopN(20).setOutputCol("similar_words").fit(word2vec).transform(word2vec.filter("word IN ('曹操', '操', '玄德', '刘备', '孔明', " + "'亮', '卧龙', '周瑜', '吕布', '貂蝉', '云长', '孙权')")).select("word, similar_words").print();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap24 method c_5.
static void c_5() throws Exception {
if (!new File(DATA_DIR + ITEMCF_MODEL_FILE).exists()) {
getSourceRatings().link(new ItemCfTrainBatchOp().setUserCol(USER_COL).setItemCol(ITEM_COL).setRateCol(RATING_COL)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + ITEMCF_MODEL_FILE));
BatchOperator.execute();
}
MemSourceBatchOp test_data = new MemSourceBatchOp(new Long[] { 1L }, "user_id");
new ItemCfItemsPerUserRecommender().setUserCol(USER_COL).setRecommCol(RECOMM_COL).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + ITEMCF_MODEL_FILE)).transform(test_data).print();
LocalPredictor recomm_predictor = new ItemCfItemsPerUserRecommender().setUserCol(USER_COL).setRecommCol(RECOMM_COL).setK(20).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + ITEMCF_MODEL_FILE)).collectLocalPredictor("user_id long");
System.out.println(recomm_predictor.getOutputSchema());
LocalPredictor kv_predictor = new Lookup().setSelectedCols(ITEM_COL).setOutputCols("item_name").setModelData(getSourceItems()).setMapKeyCols("item_id").setMapValueCols("title").collectLocalPredictor("item_id long");
System.out.println(kv_predictor.getOutputSchema());
MTable recommResult = (MTable) recomm_predictor.map(Row.of(1L)).getField(1);
System.out.println(recommResult);
new Lookup().setSelectedCols(ITEM_COL).setOutputCols("item_name").setModelData(getSourceItems()).setMapKeyCols("item_id").setMapValueCols("title").transform(getSourceRatings().filter("user_id=1 AND rating>4")).select("item_name").orderBy("item_name", 1000).lazyPrint(-1);
LocalPredictor recomm_predictor_2 = new ItemCfItemsPerUserRecommender().setUserCol(USER_COL).setRecommCol(RECOMM_COL).setK(20).setExcludeKnown(true).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + ITEMCF_MODEL_FILE)).collectLocalPredictor("user_id long");
recommResult = (MTable) recomm_predictor_2.map(Row.of(1L)).getField(1);
System.out.println(recommResult);
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap24 method c_4.
static void c_4() throws Exception {
TsvSourceBatchOp train_set = new TsvSourceBatchOp().setFilePath(DATA_DIR + RATING_TRAIN_FILE).setSchemaStr(RATING_SCHEMA_STRING);
TsvSourceBatchOp test_set = new TsvSourceBatchOp().setFilePath(DATA_DIR + RATING_TEST_FILE).setSchemaStr(RATING_SCHEMA_STRING);
if (!new File(DATA_DIR + ALS_MODEL_FILE).exists()) {
train_set.link(new AlsTrainBatchOp().setUserCol(USER_COL).setItemCol(ITEM_COL).setRateCol(RATING_COL).setLambda(0.1).setRank(10).setNumIter(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + ALS_MODEL_FILE));
BatchOperator.execute();
}
new PipelineModel(new AlsRateRecommender().setUserCol(USER_COL).setItemCol(ITEM_COL).setRecommCol(RECOMM_COL).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + ALS_MODEL_FILE)), new Lookup().setSelectedCols(ITEM_COL).setOutputCols("item_name").setModelData(getSourceItems()).setMapKeyCols("item_id").setMapValueCols("title")).transform(test_set.filter("user_id=1")).select("user_id, rating, recomm, item_name").orderBy("rating, recomm", 1000).lazyPrint(-1);
BatchOperator.execute();
new AlsRateRecommender().setUserCol(USER_COL).setItemCol(ITEM_COL).setRecommCol(RECOMM_COL).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + ALS_MODEL_FILE)).transform(test_set).link(new EvalRegressionBatchOp().setLabelCol(RATING_COL).setPredictionCol(RECOMM_COL).lazyPrintMetrics());
BatchOperator.execute();
}
Aggregations