use of com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp in project Alink by alibaba.
the class Chap05 method c_2_2.
static void c_2_2() throws Exception {
BatchOperator<?> ratings = Chap24.getSourceRatings();
BatchOperator<?> items = Chap24.getSourceItems();
BatchOperator left_ratings = ratings.filter("user_id<3 AND item_id<4").select("user_id, item_id, rating");
BatchOperator right_movies = items.select("item_id AS movie_id, title").filter("movie_id < 6 AND MOD(movie_id, 2) = 1");
System.out.println("# left_ratings #");
left_ratings.print();
System.out.println("\n# right_movies #");
right_movies.print();
System.out.println("# JOIN #");
new JoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
System.out.println("\n# LEFT OUTER JOIN #");
new LeftOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
System.out.println("\n# RIGHT OUTER JOIN #");
new RightOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
System.out.println("\n# FULL OUTER JOIN #");
new FullOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
}
use of com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp in project Alink by alibaba.
the class Chap11 method c_3.
static void c_3() throws Exception {
BatchOperator<?> source = getSource();
BatchOperator t1 = source.filter("ts < CAST('2014-07-16 00:00:00' AS TIMESTAMP)");
BatchOperator t2 = source.filter("ts >= CAST('2014-07-16 00:00:00' AS TIMESTAMP)");
t1.lazyPrint(3, "[ ts < '2014-07-16 00:00:00' ]").lazyPrintStatistics();
t2.lazyPrint(3, "[ ts >= '2014-07-16 00:00:00' ]").lazyPrintStatistics();
BatchOperator.execute();
String clausePreProc = "user_id, brand_id, type, ts, past_days," + "case when type=0 then 1 else 0 end AS is_click," + "case when type=1 then 1 else 0 end AS is_buy," + "case when type=2 then 1 else 0 end AS is_collect," + "case when type=3 then 1 else 0 end AS is_cart," + "case when type=0 and past_days<=30 then 1 else 0 end AS is_click_1m," + "case when type=1 and past_days<=30 then 1 else 0 end AS is_buy_1m," + "case when type=2 and past_days<=30 then 1 else 0 end AS is_collect_1m," + "case when type=3 and past_days<=30 then 1 else 0 end AS is_cart_1m," + "case when type=0 and past_days<=60 then 1 else 0 end AS is_click_2m," + "case when type=1 and past_days<=60 then 1 else 0 end AS is_buy_2m," + "case when type=2 and past_days<=60 then 1 else 0 end AS is_collect_2m," + "case when type=3 and past_days<=60 then 1 else 0 end AS is_cart_2m," + "case when type=0 and past_days<=90 then 1 else 0 end AS is_click_3m," + "case when type=1 and past_days<=90 then 1 else 0 end AS is_buy_3m," + "case when type=2 and past_days<=90 then 1 else 0 end AS is_collect_3m," + "case when type=3 and past_days<=90 then 1 else 0 end AS is_cart_3m," + "case when type=0 and past_days>30 and past_days<=60 then 1 else 0 end AS is_click_m2nd," + "case when type=1 and past_days>30 and past_days<=60 then 1 else 0 end AS is_buy_m2nd," + "case when type=2 and past_days>30 and past_days<=60 then 1 else 0 end AS is_collect_m2nd," + "case when type=3 and past_days>30 and past_days<=60 then 1 else 0 end AS is_cart_m2nd," + "case when type=0 and past_days>60 and past_days<=90 then 1 else 0 end AS is_click_m3th," + "case when type=1 and past_days>60 and past_days<=90 then 1 else 0 end AS is_buy_m3th," + "case when type=2 and past_days>60 and past_days<=90 then 1 else 0 end AS is_collect_m3th," + "case when type=3 and past_days>60 and past_days<=90 then 1 else 0 end AS is_cart_m3th," + "case when type=0 and past_days<=3 then 1 else 0 end AS is_click_3d," + "case when type=1 and past_days<=3 then 1 else 0 end AS is_buy_3d," + "case when type=2 and past_days<=3 then 1 else 0 end AS is_collect_3d," + "case when type=3 and past_days<=3 then 1 else 0 end AS is_cart_3d," + "case when type=0 and past_days>3 and past_days<=6 then 1 else 0 end AS is_click_3d2nd," + "case when type=1 and past_days>3 and past_days<=6 then 1 else 0 end AS is_buy_3d2nd," + "case when type=2 and past_days>3 and past_days<=6 then 1 else 0 end AS is_collect_3d2nd," + "case when type=3 and past_days>3 and past_days<=6 then 1 else 0 end AS is_cart_3d2nd," + "case when type=0 and past_days>6 and past_days<=9 then 1 else 0 end AS is_click_3d3th," + "case when type=1 and past_days>6 and past_days<=9 then 1 else 0 end AS is_buy_3d3th," + "case when type=2 and past_days>6 and past_days<=9 then 1 else 0 end AS is_collect_3d3th," + "case when type=3 and past_days>6 and past_days<=9 then 1 else 0 end AS is_cart_3d3th," + "case when type=0 and past_days<=7 then 1 else 0 end AS is_click_1w," + "case when type=1 and past_days<=7 then 1 else 0 end AS is_buy_1w," + "case when type=2 and past_days<=7 then 1 else 0 end AS is_collect_1w," + "case when type=3 and past_days<=7 then 1 else 0 end AS is_cart_1w," + "case when type=0 and past_days>7 and past_days<=14 then 1 else 0 end AS is_click_w2nd," + "case when type=1 and past_days>7 and past_days<=14 then 1 else 0 end AS is_buy_w2nd," + "case when type=2 and past_days>7 and past_days<=14 then 1 else 0 end AS is_collect_w2nd," + "case when type=3 and past_days>7 and past_days<=14 then 1 else 0 end AS is_cart_w2nd," + "case when type=0 and past_days>14 and past_days<=21 then 1 else 0 end AS is_click_w3th," + "case when type=1 and past_days>14 and past_days<=21 then 1 else 0 end AS is_buy_w3th," + "case when type=2 and past_days>14 and past_days<=21 then 1 else 0 end AS is_collect_w3th," + "case when type=3 and past_days>14 and past_days<=21 then 1 else 0 end AS is_cart_w3th";
BatchOperator t1_preproc = t1.select("user_id, brand_id, type, ts, " + "TIMESTAMPDIFF(DAY, ts, TIMESTAMP '2014-07-16 00:00:00') AS past_days").select(clausePreProc);
String clauseUserBrand = "user_id, brand_id, SUM(is_click) as cnt_click, SUM(is_buy) as cnt_buy, " + "SUM(is_collect) as cnt_collect, SUM(is_cart) as cnt_cart, " + "SUM(is_click_1m) as cnt_click_1m, SUM(is_buy_1m) as cnt_buy_1m, " + "SUM(is_collect_1m) as cnt_collect_1m, SUM(is_cart_1m) as cnt_cart_1m, " + "SUM(is_click_2m) as cnt_click_2m, SUM(is_buy_2m) as cnt_buy_2m, " + "SUM(is_collect_2m) as cnt_collect_2m, SUM(is_cart_2m) as cnt_cart_2m, " + "SUM(is_click_3m) as cnt_click_3m, SUM(is_buy_3m) as cnt_buy_3m, " + "SUM(is_collect_3m) as cnt_collect_3m, SUM(is_cart_3m) as cnt_cart_3m, " + "SUM(is_click_m2nd) as cnt_click_m2nd, SUM(is_buy_m2nd) as cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as cnt_collect_m2nd, SUM(is_cart_m2nd) as cnt_cart_m2nd, " + "SUM(is_click_m3th) as cnt_click_m3th, SUM(is_buy_m3th) as cnt_buy_m3th, " + "SUM(is_collect_m3th) as cnt_collect_m3th, SUM(is_cart_m3th) as cnt_cart_m3th, " + "SUM(is_click_3d) as cnt_click_3d, SUM(is_buy_3d) as cnt_buy_3d, " + "SUM(is_collect_3d) as cnt_collect_3d, SUM(is_cart_3d) as cnt_cart_3d, " + "SUM(is_click_3d2nd) as cnt_click_3d2nd, SUM(is_buy_3d2nd) as cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as cnt_collect_3d2nd, SUM(is_cart_3d2nd) as cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as cnt_click_3d3th, SUM(is_buy_3d3th) as cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as cnt_collect_3d3th, SUM(is_cart_3d3th) as cnt_cart_3d3th, " + "SUM(is_click_1w) as cnt_click_1w, SUM(is_buy_1w) as cnt_buy_1w, " + "SUM(is_collect_1w) as cnt_collect_1w, SUM(is_cart_1w) as cnt_cart_1w, " + "SUM(is_click_w2nd) as cnt_click_w2nd, SUM(is_buy_w2nd) as cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as cnt_collect_w2nd, SUM(is_cart_w2nd) as cnt_cart_w2nd, " + "SUM(is_click_w3th) as cnt_click_w3th, SUM(is_buy_w3th) as cnt_buy_w3th, " + "SUM(is_collect_w3th) as cnt_collect_w3th, SUM(is_cart_w3th) as cnt_cart_w3th";
BatchOperator t1_userbrand = t1_preproc.groupBy("user_id, brand_id", clauseUserBrand);
String clauseUserBrand_Rate = "user_id,brand_id," + "cnt_click,cnt_buy,cnt_collect,cnt_cart," + "cnt_click_1m,cnt_buy_1m,cnt_collect_1m,cnt_cart_1m," + "cnt_click_2m,cnt_buy_2m,cnt_collect_2m,cnt_cart_2m," + "cnt_click_3m,cnt_buy_3m,cnt_collect_3m,cnt_cart_3m," + "cnt_click_m2nd,cnt_buy_m2nd,cnt_collect_m2nd,cnt_cart_m2nd," + "cnt_click_m3th,cnt_buy_m3th,cnt_collect_m3th,cnt_cart_m3th," + "cnt_click_3d,cnt_buy_3d,cnt_collect_3d,cnt_cart_3d," + "cnt_click_3d2nd,cnt_buy_3d2nd,cnt_collect_3d2nd,cnt_cart_3d2nd," + "cnt_click_3d3th,cnt_buy_3d3th,cnt_collect_3d3th,cnt_cart_3d3th," + "cnt_click_1w,cnt_buy_1w,cnt_collect_1w,cnt_cart_1w," + "cnt_click_w2nd,cnt_buy_w2nd,cnt_collect_w2nd,cnt_cart_w2nd," + "cnt_click_w3th,cnt_buy_w3th,cnt_collect_w3th,cnt_cart_w3th," + "case when cnt_buy>cnt_click then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_click end AS " + "rt_click2buy," + "case when cnt_buy>cnt_collect then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_collect end AS " + "rt_collect2buy," + "case when cnt_buy>cnt_cart then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_cart end AS " + "rt_cart2buy," + "case when cnt_buy_3d>cnt_click_3d then 1.0 when cnt_buy_3d=0 then 0.0 else cnt_buy_3d*1.0/cnt_click_3d" + " end AS rt_click2buy_3d," + "case when cnt_buy_3d>cnt_collect_3d then 1.0 when cnt_buy_3d=0 then 0.0 else " + "cnt_buy_3d*1.0/cnt_collect_3d end AS rt_collect2buy_3d," + "case when cnt_buy_3d>cnt_cart_3d then 1.0 when cnt_buy_3d=0 then 0.0 else cnt_buy_3d*1.0/cnt_cart_3d " + "end AS rt_cart2buy_3d," + "case when cnt_buy_1w>cnt_click_1w then 1.0 when cnt_buy_1w=0 then 0.0 else cnt_buy_1w*1.0/cnt_click_1w" + " end AS rt_click2buy_1w," + "case when cnt_buy_1w>cnt_collect_1w then 1.0 when cnt_buy_1w=0 then 0.0 else " + "cnt_buy_1w*1.0/cnt_collect_1w end AS rt_collect2buy_1w," + "case when cnt_buy_1w>cnt_cart_1w then 1.0 when cnt_buy_1w=0 then 0.0 else cnt_buy_1w*1.0/cnt_cart_1w " + "end AS rt_cart2buy_1w," + "case when cnt_buy_1m>cnt_click_1m then 1.0 when cnt_buy_1m=0 then 0.0 else cnt_buy_1m*1.0/cnt_click_1m" + " end AS rt_click2buy_1m," + "case when cnt_buy_1m>cnt_collect_1m then 1.0 when cnt_buy_1m=0 then 0.0 else " + "cnt_buy_1m*1.0/cnt_collect_1m end AS rt_collect2buy_1m," + "case when cnt_buy_1m>cnt_cart_1m then 1.0 when cnt_buy_1m=0 then 0.0 else cnt_buy_1m*1.0/cnt_cart_1m " + "end AS rt_cart2buy_1m," + "case when cnt_click_3d=0 then 0.0 when cnt_click_3d>=120.0*cnt_click_3d2nd then 120.0 else " + "cnt_click_3d*1.0/cnt_click_3d2nd end AS rt_click_3d," + "case when cnt_buy_3d=0 then 0.0 when cnt_buy_3d>=10.0*cnt_buy_3d2nd then 10.0 else " + "cnt_buy_3d*1.0/cnt_buy_3d2nd end AS rt_buy_3d," + "case when cnt_collect_3d=0 then 0.0 when cnt_collect_3d>=10.0*cnt_collect_3d2nd then 10.0 else " + "cnt_collect_3d*1.0/cnt_collect_3d2nd end AS rt_collect_3d," + "case when cnt_cart_3d=0 then 0.0 when cnt_cart_3d>=20.0*cnt_cart_3d2nd then 20.0 else " + "cnt_cart_3d*1.0/cnt_cart_3d2nd end AS rt_cart_3d," + "case when cnt_click_1w=0 then 0.0 when cnt_click_1w>=300.0*cnt_click_w2nd then 300.0 else " + "cnt_click_1w*1.0/cnt_click_w2nd end AS rt_click_1w," + "case when cnt_buy_1w=0 then 0.0 when cnt_buy_1w>=15.0*cnt_buy_w2nd then 15.0 else " + "cnt_buy_1w*1.0/cnt_buy_w2nd end AS rt_buy_1w," + "case when cnt_collect_1w=0 then 0.0 when cnt_collect_1w>=15.0*cnt_collect_w2nd then 15.0 else " + "cnt_collect_1w*1.0/cnt_collect_w2nd end AS rt_collect_1w," + "case when cnt_cart_1w=0 then 0.0 when cnt_cart_1w>=40.0*cnt_cart_w2nd then 40.0 else " + "cnt_cart_1w*1.0/cnt_cart_w2nd end AS rt_cart_1w," + "case when cnt_click_1m=0 then 0.0 when cnt_click_1m>=500.0*cnt_click_m2nd then 500.0 else " + "cnt_click_1m*1.0/cnt_click_m2nd end AS rt_click_1m," + "case when cnt_buy_1m=0 then 0.0 when cnt_buy_1m>=30.0*cnt_buy_m2nd then 30.0 else " + "cnt_buy_1m*1.0/cnt_buy_m2nd end AS rt_buy_1m," + "case when cnt_collect_1m=0 then 0.0 when cnt_collect_1m>=30.0*cnt_collect_m2nd then 30.0 else " + "cnt_collect_1m*1.0/cnt_collect_m2nd end AS rt_collect_1m," + "case when cnt_cart_1m=0 then 0.0 when cnt_cart_1m>=50.0*cnt_cart_m2nd then 50.0 else " + "cnt_cart_1m*1.0/cnt_cart_m2nd end AS rt_cart_1m";
t1_userbrand = t1_userbrand.select(clauseUserBrand_Rate);
String clauseUser = "user_id, " + "SUM(is_click) as user_cnt_click, SUM(is_buy) as user_cnt_buy, " + "SUM(is_collect) as user_cnt_collect, SUM(is_cart) as user_cnt_cart, " + "SUM(is_click_1m) as user_cnt_click_1m, SUM(is_buy_1m) as user_cnt_buy_1m, " + "SUM(is_collect_1m) as user_cnt_collect_1m, SUM(is_cart_1m) as user_cnt_cart_1m, " + "SUM(is_click_2m) as user_cnt_click_2m, SUM(is_buy_2m) as user_cnt_buy_2m, " + "SUM(is_collect_2m) as user_cnt_collect_2m, SUM(is_cart_2m) as user_cnt_cart_2m, " + "SUM(is_click_3m) as user_cnt_click_3m, SUM(is_buy_3m) as user_cnt_buy_3m, " + "SUM(is_collect_3m) as user_cnt_collect_3m, SUM(is_cart_3m) as user_cnt_cart_3m, " + "SUM(is_click_m2nd) as user_cnt_click_m2nd, SUM(is_buy_m2nd) as user_cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as user_cnt_collect_m2nd, SUM(is_cart_m2nd) as user_cnt_cart_m2nd, " + "SUM(is_click_m3th) as user_cnt_click_m3th, SUM(is_buy_m3th) as user_cnt_buy_m3th, " + "SUM(is_collect_m3th) as user_cnt_collect_m3th, SUM(is_cart_m3th) as user_cnt_cart_m3th, " + "SUM(is_click_3d) as user_cnt_click_3d, SUM(is_buy_3d) as user_cnt_buy_3d, " + "SUM(is_collect_3d) as user_cnt_collect_3d, SUM(is_cart_3d) as user_cnt_cart_3d, " + "SUM(is_click_3d2nd) as user_cnt_click_3d2nd, SUM(is_buy_3d2nd) as user_cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as user_cnt_collect_3d2nd, SUM(is_cart_3d2nd) as user_cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as user_cnt_click_3d3th, SUM(is_buy_3d3th) as user_cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as user_cnt_collect_3d3th, SUM(is_cart_3d3th) as user_cnt_cart_3d3th, " + "SUM(is_click_1w) as user_cnt_click_1w, SUM(is_buy_1w) as user_cnt_buy_1w, " + "SUM(is_collect_1w) as user_cnt_collect_1w, SUM(is_cart_1w) as user_cnt_cart_1w, " + "SUM(is_click_w2nd) as user_cnt_click_w2nd, SUM(is_buy_w2nd) as user_cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as user_cnt_collect_w2nd, SUM(is_cart_w2nd) as user_cnt_cart_w2nd, " + "SUM(is_click_w3th) as user_cnt_click_w3th, SUM(is_buy_w3th) as user_cnt_buy_w3th, " + "SUM(is_collect_w3th) as user_cnt_collect_w3th, SUM(is_cart_w3th) as user_cnt_cart_w3th";
BatchOperator t1_user = t1_preproc.groupBy("user_id", clauseUser);
String clauseUser_Rate = "user_id AS user_id4join," + "user_cnt_click,user_cnt_buy,user_cnt_collect,user_cnt_cart," + "user_cnt_click_1m,user_cnt_buy_1m,user_cnt_collect_1m,user_cnt_cart_1m," + "user_cnt_click_2m,user_cnt_buy_2m,user_cnt_collect_2m,user_cnt_cart_2m," + "user_cnt_click_3m,user_cnt_buy_3m,user_cnt_collect_3m,user_cnt_cart_3m," + "user_cnt_click_m2nd,user_cnt_buy_m2nd,user_cnt_collect_m2nd,user_cnt_cart_m2nd," + "user_cnt_click_m3th,user_cnt_buy_m3th,user_cnt_collect_m3th,user_cnt_cart_m3th," + "user_cnt_click_3d,user_cnt_buy_3d,user_cnt_collect_3d,user_cnt_cart_3d," + "user_cnt_click_3d2nd,user_cnt_buy_3d2nd,user_cnt_collect_3d2nd,user_cnt_cart_3d2nd," + "user_cnt_click_3d3th,user_cnt_buy_3d3th,user_cnt_collect_3d3th,user_cnt_cart_3d3th," + "user_cnt_click_1w,user_cnt_buy_1w,user_cnt_collect_1w,user_cnt_cart_1w," + "user_cnt_click_w2nd,user_cnt_buy_w2nd,user_cnt_collect_w2nd,user_cnt_cart_w2nd," + "user_cnt_click_w3th,user_cnt_buy_w3th,user_cnt_collect_w3th,user_cnt_cart_w3th," + "case when user_cnt_buy>user_cnt_click then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_click end AS user_rt_click2buy," + "case when user_cnt_buy>user_cnt_collect then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_collect end AS user_rt_collect2buy," + "case when user_cnt_buy>user_cnt_cart then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_cart end AS user_rt_cart2buy," + "case when user_cnt_buy_3d>user_cnt_click_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_click_3d end AS user_rt_click2buy_3d," + "case when user_cnt_buy_3d>user_cnt_collect_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_collect_3d end AS user_rt_collect2buy_3d," + "case when user_cnt_buy_3d>user_cnt_cart_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_cart_3d end AS user_rt_cart2buy_3d," + "case when user_cnt_buy_1w>user_cnt_click_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_click_1w end AS user_rt_click2buy_1w," + "case when user_cnt_buy_1w>user_cnt_collect_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_collect_1w end AS user_rt_collect2buy_1w," + "case when user_cnt_buy_1w>user_cnt_cart_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_cart_1w end AS user_rt_cart2buy_1w," + "case when user_cnt_buy_1m>user_cnt_click_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_click_1m end AS user_rt_click2buy_1m," + "case when user_cnt_buy_1m>user_cnt_collect_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_collect_1m end AS user_rt_collect2buy_1m," + "case when user_cnt_buy_1m>user_cnt_cart_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_cart_1m end AS user_rt_cart2buy_1m";
t1_user = t1_user.select(clauseUser_Rate);
String clauseBrand = "brand_id, " + "SUM(is_click) as brand_cnt_click, SUM(is_buy) as brand_cnt_buy, " + "SUM(is_collect) as brand_cnt_collect, SUM(is_cart) as brand_cnt_cart, " + "SUM(is_click_1m) as brand_cnt_click_1m, SUM(is_buy_1m) as brand_cnt_buy_1m, " + "SUM(is_collect_1m) as brand_cnt_collect_1m, SUM(is_cart_1m) as brand_cnt_cart_1m, " + "SUM(is_click_2m) as brand_cnt_click_2m, SUM(is_buy_2m) as brand_cnt_buy_2m, " + "SUM(is_collect_2m) as brand_cnt_collect_2m, SUM(is_cart_2m) as brand_cnt_cart_2m, " + "SUM(is_click_3m) as brand_cnt_click_3m, SUM(is_buy_3m) as brand_cnt_buy_3m, " + "SUM(is_collect_3m) as brand_cnt_collect_3m, SUM(is_cart_3m) as brand_cnt_cart_3m, " + "SUM(is_click_m2nd) as brand_cnt_click_m2nd, SUM(is_buy_m2nd) as brand_cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as brand_cnt_collect_m2nd, SUM(is_cart_m2nd) as brand_cnt_cart_m2nd, " + "SUM(is_click_m3th) as brand_cnt_click_m3th, SUM(is_buy_m3th) as brand_cnt_buy_m3th, " + "SUM(is_collect_m3th) as brand_cnt_collect_m3th, SUM(is_cart_m3th) as brand_cnt_cart_m3th, " + "SUM(is_click_3d) as brand_cnt_click_3d, SUM(is_buy_3d) as brand_cnt_buy_3d, " + "SUM(is_collect_3d) as brand_cnt_collect_3d, SUM(is_cart_3d) as brand_cnt_cart_3d, " + "SUM(is_click_3d2nd) as brand_cnt_click_3d2nd, SUM(is_buy_3d2nd) as brand_cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as brand_cnt_collect_3d2nd, SUM(is_cart_3d2nd) as brand_cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as brand_cnt_click_3d3th, SUM(is_buy_3d3th) as brand_cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as brand_cnt_collect_3d3th, SUM(is_cart_3d3th) as brand_cnt_cart_3d3th, " + "SUM(is_click_1w) as brand_cnt_click_1w, SUM(is_buy_1w) as brand_cnt_buy_1w, " + "SUM(is_collect_1w) as brand_cnt_collect_1w, SUM(is_cart_1w) as brand_cnt_cart_1w, " + "SUM(is_click_w2nd) as brand_cnt_click_w2nd, SUM(is_buy_w2nd) as brand_cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as brand_cnt_collect_w2nd, SUM(is_cart_w2nd) as brand_cnt_cart_w2nd, " + "SUM(is_click_w3th) as brand_cnt_click_w3th, SUM(is_buy_w3th) as brand_cnt_buy_w3th, " + "SUM(is_collect_w3th) as brand_cnt_collect_w3th, SUM(is_cart_w3th) as brand_cnt_cart_w3th";
BatchOperator t1_brand = t1_preproc.groupBy("brand_id", clauseBrand);
String clauseBrand_Rate = "brand_id AS brand_id4join," + "brand_cnt_click,brand_cnt_buy,brand_cnt_collect,brand_cnt_cart," + "brand_cnt_click_1m,brand_cnt_buy_1m,brand_cnt_collect_1m,brand_cnt_cart_1m," + "brand_cnt_click_2m,brand_cnt_buy_2m,brand_cnt_collect_2m,brand_cnt_cart_2m," + "brand_cnt_click_3m,brand_cnt_buy_3m,brand_cnt_collect_3m,brand_cnt_cart_3m," + "brand_cnt_click_m2nd,brand_cnt_buy_m2nd,brand_cnt_collect_m2nd,brand_cnt_cart_m2nd," + "brand_cnt_click_m3th,brand_cnt_buy_m3th,brand_cnt_collect_m3th,brand_cnt_cart_m3th," + "brand_cnt_click_3d,brand_cnt_buy_3d,brand_cnt_collect_3d,brand_cnt_cart_3d," + "brand_cnt_click_3d2nd,brand_cnt_buy_3d2nd,brand_cnt_collect_3d2nd,brand_cnt_cart_3d2nd," + "brand_cnt_click_3d3th,brand_cnt_buy_3d3th,brand_cnt_collect_3d3th,brand_cnt_cart_3d3th," + "brand_cnt_click_1w,brand_cnt_buy_1w,brand_cnt_collect_1w,brand_cnt_cart_1w," + "brand_cnt_click_w2nd,brand_cnt_buy_w2nd,brand_cnt_collect_w2nd,brand_cnt_cart_w2nd," + "brand_cnt_click_w3th,brand_cnt_buy_w3th,brand_cnt_collect_w3th,brand_cnt_cart_w3th," + "case when brand_cnt_buy>brand_cnt_click then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_click end AS brand_rt_click2buy," + "case when brand_cnt_buy>brand_cnt_collect then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_collect end AS brand_rt_collect2buy," + "case when brand_cnt_buy>brand_cnt_cart then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_cart end AS brand_rt_cart2buy," + "case when brand_cnt_buy_3d>brand_cnt_click_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_click_3d end AS brand_rt_click2buy_3d," + "case when brand_cnt_buy_3d>brand_cnt_collect_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_collect_3d end AS brand_rt_collect2buy_3d," + "case when brand_cnt_buy_3d>brand_cnt_cart_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_cart_3d end AS brand_rt_cart2buy_3d," + "case when brand_cnt_buy_1w>brand_cnt_click_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_click_1w end AS brand_rt_click2buy_1w," + "case when brand_cnt_buy_1w>brand_cnt_collect_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_collect_1w end AS brand_rt_collect2buy_1w," + "case when brand_cnt_buy_1w>brand_cnt_cart_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_cart_1w end AS brand_cart2buy_1w," + "case when brand_cnt_buy_1m>brand_cnt_click_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_click_1m end AS brand_rt_click2buy_1m," + "case when brand_cnt_buy_1m>brand_cnt_collect_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_collect_1m end AS brand_rt_collect2buy_1m," + "case when brand_cnt_buy_1m>brand_cnt_cart_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_cart_1m end AS brand_rt_cart2buy_1m";
t1_brand = t1_brand.select(clauseBrand_Rate);
BatchOperator t1_join = new JoinBatchOp().setSelectClause("*").setJoinPredicate("user_id=user_id4join").linkFrom(t1_userbrand, t1_user);
t1_join = new JoinBatchOp().setSelectClause("*").setJoinPredicate("brand_id=brand_id4join").linkFrom(t1_join, t1_brand);
BatchOperator t2_label = t2.filter("type=1").select("user_id AS user_id4label, brand_id AS brand_id4label, 1 as label").distinct();
BatchOperator feature_label = new LeftOuterJoinBatchOp().setSelectClause("*").setJoinPredicate("user_id = user_id4label AND brand_id = brand_id4label").linkFrom(t1_join, t2_label);
Imputer imputer = new Imputer().setStrategy("value").setFillValue("0").setSelectedCols("label");
feature_label = imputer.fit(feature_label).transform(feature_label);
System.out.println(feature_label.getSchema());
String[] featureColNames = ArrayUtils.removeElements(feature_label.getColNames(), new String[] { "user_id", "brand_id", "user_id4join", "brand_id4join", "user_id4label", "brand_id4label", LABEL_COL_NAME });
StringBuilder sbd = new StringBuilder();
for (String name : featureColNames) {
sbd.append("CAST(").append(name).append(" AS DOUBLE) AS ").append(name).append(", ");
}
sbd.append(LABEL_COL_NAME);
feature_label.select(sbd.toString()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + FEATURE_LABEL_FILE).setOverwriteSink(true));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp in project Alink by alibaba.
the class FmRecommTrainBatchOp method linkFrom.
/**
* There are 3 input tables: 1) user-item-label table, 2) user features table, 3) item features table.
* If user or item features table is missing, then use their IDs as features.
*/
@Override
public FmRecommTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
BatchOperator<?> samplesOp = inputs[0];
final Long envId = samplesOp.getMLEnvironmentId();
BatchOperator<?> userFeaturesOp = inputs.length >= 2 ? inputs[1] : null;
BatchOperator<?> itemFeaturesOp = inputs.length >= 3 ? inputs[2] : null;
Params params = getParams();
String userCol = params.get(USER_COL);
String itemCol = params.get(ITEM_COL);
String labelCol = params.get(RATE_COL);
String[] userFeatureCols = params.get(USER_FEATURE_COLS);
String[] itemFeatureCols = params.get(ITEM_FEATURE_COLS);
String[] userCateFeatureCols = params.get(USER_CATEGORICAL_FEATURE_COLS);
String[] itemCateFeatureCols = params.get(ITEM_CATEGORICAL_FEATURE_COLS);
if (userFeaturesOp == null) {
userFeaturesOp = samplesOp.select("`" + userCol + "`").distinct();
userFeatureCols = new String[] { userCol };
userCateFeatureCols = new String[] { userCol };
} else {
Preconditions.checkArgument(TableUtil.findColTypeWithAssert(userFeaturesOp.getSchema(), userCol).equals(TableUtil.findColTypeWithAssert(samplesOp.getSchema(), userCol)), "user column type mismatch");
}
if (itemFeaturesOp == null) {
itemFeaturesOp = samplesOp.select("`" + itemCol + "`").distinct();
itemFeatureCols = new String[] { itemCol };
itemCateFeatureCols = new String[] { itemCol };
} else {
Preconditions.checkArgument(TableUtil.findColTypeWithAssert(itemFeaturesOp.getSchema(), itemCol).equals(TableUtil.findColTypeWithAssert(samplesOp.getSchema(), itemCol)), "item column type mismatch");
}
BatchOperator<?> history = samplesOp.select(new String[] { userCol, itemCol });
userFeaturesOp = createFeatureVectors(userFeaturesOp, userCol, userFeatureCols, userCateFeatureCols);
itemFeaturesOp = createFeatureVectors(itemFeaturesOp, itemCol, itemFeatureCols, itemCateFeatureCols);
LeftOuterJoinBatchOp joinOp1 = new LeftOuterJoinBatchOp().setMLEnvironmentId(envId).setJoinPredicate("a.`" + userCol + "`=" + "b.`" + userCol + "`").setSelectClause("a.*, b.__fm_features__ as __user_features__");
LeftOuterJoinBatchOp joinOp2 = new LeftOuterJoinBatchOp().setMLEnvironmentId(envId).setJoinPredicate("a.`" + itemCol + "`=" + "b.`" + itemCol + "`").setSelectClause("a.*, b.__fm_features__ as __item_features__");
samplesOp = joinOp1.linkFrom(samplesOp, userFeaturesOp);
samplesOp = joinOp2.linkFrom(samplesOp, itemFeaturesOp);
samplesOp = samplesOp.udf("__user_features__", "__user_features__", new CheckNotNull());
samplesOp = samplesOp.udf("__item_features__", "__item_features__", new CheckNotNull());
VectorAssembler va = new VectorAssembler().setMLEnvironmentId(envId).setSelectedCols("__user_features__", "__item_features__").setOutputCol("__alink_features__").setReservedCols(labelCol);
samplesOp = va.transform(samplesOp);
BatchOperator<?> fmModel;
if (!implicitFeedback) {
fmModel = new FmRegressorTrainBatchOp(params).setLabelCol(params.get(RATE_COL)).setVectorCol("__alink_features__").setMLEnvironmentId(envId);
} else {
fmModel = new FmClassifierTrainBatchOp(params).setLabelCol(params.get(RATE_COL)).setVectorCol("__alink_features__").setMLEnvironmentId(envId);
}
fmModel.linkFrom(samplesOp);
BatchOperator<?> model = PackBatchOperatorUtil.packBatchOps(new BatchOperator<?>[] { fmModel, userFeaturesOp, itemFeaturesOp, history });
setOutputTable(model.getOutputTable());
return this;
}
use of com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp in project Alink by alibaba.
the class HugeMfAlsImpl method factorize.
public static Tuple2<BatchOperator<?>, BatchOperator<?>> factorize(BatchOperator<?> data, Params params, boolean implicit) {
final Long envId = data.getMLEnvironmentId();
final String userColName = params.get(AlsTrainParams.USER_COL);
final String itemColName = params.get(AlsTrainParams.ITEM_COL);
final String rateColName = params.get(AlsTrainParams.RATE_COL);
final double lambda = params.get(AlsTrainParams.LAMBDA);
final int rank = params.get(AlsTrainParams.RANK);
final int numIter = params.get(AlsTrainParams.NUM_ITER);
final boolean nonNegative = params.get(AlsTrainParams.NON_NEGATIVE);
final double alpha = params.get(AlsImplicitTrainParams.ALPHA);
final int numMiniBatches = params.get(AlsTrainParams.NUM_BLOCKS);
final int userColIdx = TableUtil.findColIndexWithAssert(data.getColNames(), userColName);
final int itemColIdx = TableUtil.findColIndexWithAssert(data.getColNames(), itemColName);
final int rateColIdx = TableUtil.findColIndexWithAssert(data.getColNames(), rateColName);
TypeInformation<?> userType = data.getColTypes()[userColIdx];
TypeInformation<?> itemType = data.getColTypes()[itemColIdx];
boolean isLongTypeId = userType.equals(Types.LONG) && itemType.equals(Types.LONG);
BatchOperator<?> distinctUsers = null;
BatchOperator<?> distinctItems = null;
if (!isLongTypeId) {
distinctUsers = data.select("`" + userColName + "`").distinct();
distinctItems = data.select("`" + itemColName + "`").distinct();
MultiStringIndexerTrainBatchOp msi = new MultiStringIndexerTrainBatchOp().setMLEnvironmentId(envId).setSelectedCols(userColName, itemColName).linkFrom(data);
MultiStringIndexerPredictBatchOp msiPredictor = new MultiStringIndexerPredictBatchOp().setMLEnvironmentId(envId).setSelectedCols(userColName, itemColName);
data = msiPredictor.linkFrom(msi, data);
distinctUsers = new MultiStringIndexerPredictBatchOp().setMLEnvironmentId(envId).setSelectedCols(userColName).setOutputCols("__user_index").linkFrom(msi, distinctUsers);
distinctItems = new MultiStringIndexerPredictBatchOp().setMLEnvironmentId(envId).setSelectedCols(itemColName).setOutputCols("__item_index").linkFrom(msi, distinctItems);
}
// tuple3: userId, itemId, rating
DataSet<Tuple3<Long, Long, Float>> alsInput = data.getDataSet().map(new MapFunction<Row, Tuple3<Long, Long, Float>>() {
private static final long serialVersionUID = 6671683813980584160L;
@Override
public Tuple3<Long, Long, Float> map(Row value) {
Object user = value.getField(userColIdx);
Object item = value.getField(itemColIdx);
Object rating = value.getField(rateColIdx);
Preconditions.checkNotNull(user, "user is null");
Preconditions.checkNotNull(item, "item is null");
Preconditions.checkNotNull(rating, "rating is null");
return new Tuple3<>(((Number) user).longValue(), ((Number) item).longValue(), ((Number) rating).floatValue());
}
});
AlsTrain als = new AlsTrain(rank, numIter, lambda, implicit, alpha, numMiniBatches, nonNegative);
DataSet<Tuple3<Byte, Long, float[]>> factors = als.fit(alsInput);
BatchOperator<?> userFactors = getFactors(envId, factors, userColName, (byte) 0);
BatchOperator<?> itemFactors = getFactors(envId, factors, itemColName, (byte) 1);
if (!isLongTypeId) {
BatchOperator<?> joinUser = new LeftOuterJoinBatchOp().setMLEnvironmentId(envId).setJoinPredicate(String.format("a.`%s`=b.__user_index", userColName)).setSelectClause(String.format("b.`%s`, a.`%s`", userColName, "factors"));
userFactors = joinUser.linkFrom(userFactors, distinctUsers);
BatchOperator<?> joinItem = new LeftOuterJoinBatchOp().setMLEnvironmentId(envId).setJoinPredicate(String.format("a.`%s`=b.__item_index", itemColName)).setSelectClause(String.format("b.`%s`, a.`%s`", itemColName, "factors"));
itemFactors = joinItem.linkFrom(itemFactors, distinctItems);
}
return Tuple2.of(userFactors, itemFactors);
}
Aggregations