Search in sources :

Example 1 with JoinBatchOp

use of com.alibaba.alink.operator.batch.sql.JoinBatchOp in project Alink by alibaba.

the class Chap05 method c_2_2.

static void c_2_2() throws Exception {
    BatchOperator<?> ratings = Chap24.getSourceRatings();
    BatchOperator<?> items = Chap24.getSourceItems();
    BatchOperator left_ratings = ratings.filter("user_id<3 AND item_id<4").select("user_id, item_id, rating");
    BatchOperator right_movies = items.select("item_id AS movie_id, title").filter("movie_id < 6 AND MOD(movie_id, 2) = 1");
    System.out.println("# left_ratings #");
    left_ratings.print();
    System.out.println("\n# right_movies #");
    right_movies.print();
    System.out.println("# JOIN #");
    new JoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
    System.out.println("\n# LEFT OUTER JOIN #");
    new LeftOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
    System.out.println("\n# RIGHT OUTER JOIN #");
    new RightOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
    System.out.println("\n# FULL OUTER JOIN #");
    new FullOuterJoinBatchOp().setJoinPredicate("item_id = movie_id").setSelectClause("user_id, item_id, title, rating").linkFrom(left_ratings, right_movies).print();
}
Also used : LeftOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp) FullOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.FullOuterJoinBatchOp) LeftOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp) JoinBatchOp(com.alibaba.alink.operator.batch.sql.JoinBatchOp) FullOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.FullOuterJoinBatchOp) RightOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.RightOuterJoinBatchOp) RightOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.RightOuterJoinBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 2 with JoinBatchOp

use of com.alibaba.alink.operator.batch.sql.JoinBatchOp in project Alink by alibaba.

the class Chap11 method c_3.

static void c_3() throws Exception {
    BatchOperator<?> source = getSource();
    BatchOperator t1 = source.filter("ts < CAST('2014-07-16 00:00:00' AS TIMESTAMP)");
    BatchOperator t2 = source.filter("ts >= CAST('2014-07-16 00:00:00' AS TIMESTAMP)");
    t1.lazyPrint(3, "[ ts < '2014-07-16 00:00:00' ]").lazyPrintStatistics();
    t2.lazyPrint(3, "[ ts >= '2014-07-16 00:00:00' ]").lazyPrintStatistics();
    BatchOperator.execute();
    String clausePreProc = "user_id, brand_id, type, ts, past_days," + "case when type=0 then 1 else 0 end AS is_click," + "case when type=1 then 1 else 0 end AS is_buy," + "case when type=2 then 1 else 0 end AS is_collect," + "case when type=3 then 1 else 0 end AS is_cart," + "case when type=0 and past_days<=30 then 1 else 0 end AS is_click_1m," + "case when type=1 and past_days<=30 then 1 else 0 end AS is_buy_1m," + "case when type=2 and past_days<=30 then 1 else 0 end AS is_collect_1m," + "case when type=3 and past_days<=30 then 1 else 0 end AS is_cart_1m," + "case when type=0 and past_days<=60 then 1 else 0 end AS is_click_2m," + "case when type=1 and past_days<=60 then 1 else 0 end AS is_buy_2m," + "case when type=2 and past_days<=60 then 1 else 0 end AS is_collect_2m," + "case when type=3 and past_days<=60 then 1 else 0 end AS is_cart_2m," + "case when type=0 and past_days<=90 then 1 else 0 end AS is_click_3m," + "case when type=1 and past_days<=90 then 1 else 0 end AS is_buy_3m," + "case when type=2 and past_days<=90 then 1 else 0 end AS is_collect_3m," + "case when type=3 and past_days<=90 then 1 else 0 end AS is_cart_3m," + "case when type=0 and past_days>30 and past_days<=60 then 1 else 0 end AS is_click_m2nd," + "case when type=1 and past_days>30 and past_days<=60 then 1 else 0 end AS is_buy_m2nd," + "case when type=2 and past_days>30 and past_days<=60 then 1 else 0 end AS is_collect_m2nd," + "case when type=3 and past_days>30 and past_days<=60 then 1 else 0 end AS is_cart_m2nd," + "case when type=0 and past_days>60 and past_days<=90 then 1 else 0 end AS is_click_m3th," + "case when type=1 and past_days>60 and past_days<=90 then 1 else 0 end AS is_buy_m3th," + "case when type=2 and past_days>60 and past_days<=90 then 1 else 0 end AS is_collect_m3th," + "case when type=3 and past_days>60 and past_days<=90 then 1 else 0 end AS is_cart_m3th," + "case when type=0 and past_days<=3 then 1 else 0 end AS is_click_3d," + "case when type=1 and past_days<=3 then 1 else 0 end AS is_buy_3d," + "case when type=2 and past_days<=3 then 1 else 0 end AS is_collect_3d," + "case when type=3 and past_days<=3 then 1 else 0 end AS is_cart_3d," + "case when type=0 and past_days>3 and past_days<=6 then 1 else 0 end AS is_click_3d2nd," + "case when type=1 and past_days>3 and past_days<=6 then 1 else 0 end AS is_buy_3d2nd," + "case when type=2 and past_days>3 and past_days<=6 then 1 else 0 end AS is_collect_3d2nd," + "case when type=3 and past_days>3 and past_days<=6 then 1 else 0 end AS is_cart_3d2nd," + "case when type=0 and past_days>6 and past_days<=9 then 1 else 0 end AS is_click_3d3th," + "case when type=1 and past_days>6 and past_days<=9 then 1 else 0 end AS is_buy_3d3th," + "case when type=2 and past_days>6 and past_days<=9 then 1 else 0 end AS is_collect_3d3th," + "case when type=3 and past_days>6 and past_days<=9 then 1 else 0 end AS is_cart_3d3th," + "case when type=0 and past_days<=7 then 1 else 0 end AS is_click_1w," + "case when type=1 and past_days<=7 then 1 else 0 end AS is_buy_1w," + "case when type=2 and past_days<=7 then 1 else 0 end AS is_collect_1w," + "case when type=3 and past_days<=7 then 1 else 0 end AS is_cart_1w," + "case when type=0 and past_days>7 and past_days<=14 then 1 else 0 end AS is_click_w2nd," + "case when type=1 and past_days>7 and past_days<=14 then 1 else 0 end AS is_buy_w2nd," + "case when type=2 and past_days>7 and past_days<=14 then 1 else 0 end AS is_collect_w2nd," + "case when type=3 and past_days>7 and past_days<=14 then 1 else 0 end AS is_cart_w2nd," + "case when type=0 and past_days>14 and past_days<=21 then 1 else 0 end AS is_click_w3th," + "case when type=1 and past_days>14 and past_days<=21 then 1 else 0 end AS is_buy_w3th," + "case when type=2 and past_days>14 and past_days<=21 then 1 else 0 end AS is_collect_w3th," + "case when type=3 and past_days>14 and past_days<=21 then 1 else 0 end AS is_cart_w3th";
    BatchOperator t1_preproc = t1.select("user_id, brand_id, type, ts, " + "TIMESTAMPDIFF(DAY, ts, TIMESTAMP '2014-07-16 00:00:00') AS past_days").select(clausePreProc);
    String clauseUserBrand = "user_id, brand_id, SUM(is_click) as cnt_click, SUM(is_buy) as cnt_buy, " + "SUM(is_collect) as cnt_collect, SUM(is_cart) as cnt_cart, " + "SUM(is_click_1m) as cnt_click_1m, SUM(is_buy_1m) as cnt_buy_1m, " + "SUM(is_collect_1m) as cnt_collect_1m, SUM(is_cart_1m) as cnt_cart_1m, " + "SUM(is_click_2m) as cnt_click_2m, SUM(is_buy_2m) as cnt_buy_2m, " + "SUM(is_collect_2m) as cnt_collect_2m, SUM(is_cart_2m) as cnt_cart_2m, " + "SUM(is_click_3m) as cnt_click_3m, SUM(is_buy_3m) as cnt_buy_3m, " + "SUM(is_collect_3m) as cnt_collect_3m, SUM(is_cart_3m) as cnt_cart_3m, " + "SUM(is_click_m2nd) as cnt_click_m2nd, SUM(is_buy_m2nd) as cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as cnt_collect_m2nd, SUM(is_cart_m2nd) as cnt_cart_m2nd, " + "SUM(is_click_m3th) as cnt_click_m3th, SUM(is_buy_m3th) as cnt_buy_m3th, " + "SUM(is_collect_m3th) as cnt_collect_m3th, SUM(is_cart_m3th) as cnt_cart_m3th, " + "SUM(is_click_3d) as cnt_click_3d, SUM(is_buy_3d) as cnt_buy_3d, " + "SUM(is_collect_3d) as cnt_collect_3d, SUM(is_cart_3d) as cnt_cart_3d, " + "SUM(is_click_3d2nd) as cnt_click_3d2nd, SUM(is_buy_3d2nd) as cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as cnt_collect_3d2nd, SUM(is_cart_3d2nd) as cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as cnt_click_3d3th, SUM(is_buy_3d3th) as cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as cnt_collect_3d3th, SUM(is_cart_3d3th) as cnt_cart_3d3th, " + "SUM(is_click_1w) as cnt_click_1w, SUM(is_buy_1w) as cnt_buy_1w, " + "SUM(is_collect_1w) as cnt_collect_1w, SUM(is_cart_1w) as cnt_cart_1w, " + "SUM(is_click_w2nd) as cnt_click_w2nd, SUM(is_buy_w2nd) as cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as cnt_collect_w2nd, SUM(is_cart_w2nd) as cnt_cart_w2nd, " + "SUM(is_click_w3th) as cnt_click_w3th, SUM(is_buy_w3th) as cnt_buy_w3th, " + "SUM(is_collect_w3th) as cnt_collect_w3th, SUM(is_cart_w3th) as cnt_cart_w3th";
    BatchOperator t1_userbrand = t1_preproc.groupBy("user_id, brand_id", clauseUserBrand);
    String clauseUserBrand_Rate = "user_id,brand_id," + "cnt_click,cnt_buy,cnt_collect,cnt_cart," + "cnt_click_1m,cnt_buy_1m,cnt_collect_1m,cnt_cart_1m," + "cnt_click_2m,cnt_buy_2m,cnt_collect_2m,cnt_cart_2m," + "cnt_click_3m,cnt_buy_3m,cnt_collect_3m,cnt_cart_3m," + "cnt_click_m2nd,cnt_buy_m2nd,cnt_collect_m2nd,cnt_cart_m2nd," + "cnt_click_m3th,cnt_buy_m3th,cnt_collect_m3th,cnt_cart_m3th," + "cnt_click_3d,cnt_buy_3d,cnt_collect_3d,cnt_cart_3d," + "cnt_click_3d2nd,cnt_buy_3d2nd,cnt_collect_3d2nd,cnt_cart_3d2nd," + "cnt_click_3d3th,cnt_buy_3d3th,cnt_collect_3d3th,cnt_cart_3d3th," + "cnt_click_1w,cnt_buy_1w,cnt_collect_1w,cnt_cart_1w," + "cnt_click_w2nd,cnt_buy_w2nd,cnt_collect_w2nd,cnt_cart_w2nd," + "cnt_click_w3th,cnt_buy_w3th,cnt_collect_w3th,cnt_cart_w3th," + "case when cnt_buy>cnt_click then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_click end AS " + "rt_click2buy," + "case when cnt_buy>cnt_collect then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_collect end AS " + "rt_collect2buy," + "case when cnt_buy>cnt_cart then 1.0 when cnt_buy=0 then 0.0 else cnt_buy*1.0/cnt_cart end AS " + "rt_cart2buy," + "case when cnt_buy_3d>cnt_click_3d then 1.0 when cnt_buy_3d=0 then 0.0 else cnt_buy_3d*1.0/cnt_click_3d" + " end AS rt_click2buy_3d," + "case when cnt_buy_3d>cnt_collect_3d then 1.0 when cnt_buy_3d=0 then 0.0 else " + "cnt_buy_3d*1.0/cnt_collect_3d end AS rt_collect2buy_3d," + "case when cnt_buy_3d>cnt_cart_3d then 1.0 when cnt_buy_3d=0 then 0.0 else cnt_buy_3d*1.0/cnt_cart_3d " + "end AS rt_cart2buy_3d," + "case when cnt_buy_1w>cnt_click_1w then 1.0 when cnt_buy_1w=0 then 0.0 else cnt_buy_1w*1.0/cnt_click_1w" + " end AS rt_click2buy_1w," + "case when cnt_buy_1w>cnt_collect_1w then 1.0 when cnt_buy_1w=0 then 0.0 else " + "cnt_buy_1w*1.0/cnt_collect_1w end AS rt_collect2buy_1w," + "case when cnt_buy_1w>cnt_cart_1w then 1.0 when cnt_buy_1w=0 then 0.0 else cnt_buy_1w*1.0/cnt_cart_1w " + "end AS rt_cart2buy_1w," + "case when cnt_buy_1m>cnt_click_1m then 1.0 when cnt_buy_1m=0 then 0.0 else cnt_buy_1m*1.0/cnt_click_1m" + " end AS rt_click2buy_1m," + "case when cnt_buy_1m>cnt_collect_1m then 1.0 when cnt_buy_1m=0 then 0.0 else " + "cnt_buy_1m*1.0/cnt_collect_1m end AS rt_collect2buy_1m," + "case when cnt_buy_1m>cnt_cart_1m then 1.0 when cnt_buy_1m=0 then 0.0 else cnt_buy_1m*1.0/cnt_cart_1m " + "end AS rt_cart2buy_1m," + "case when cnt_click_3d=0 then 0.0 when cnt_click_3d>=120.0*cnt_click_3d2nd then 120.0 else " + "cnt_click_3d*1.0/cnt_click_3d2nd end AS rt_click_3d," + "case when cnt_buy_3d=0 then 0.0 when cnt_buy_3d>=10.0*cnt_buy_3d2nd then 10.0 else " + "cnt_buy_3d*1.0/cnt_buy_3d2nd end AS rt_buy_3d," + "case when cnt_collect_3d=0 then 0.0 when cnt_collect_3d>=10.0*cnt_collect_3d2nd then 10.0 else " + "cnt_collect_3d*1.0/cnt_collect_3d2nd end AS rt_collect_3d," + "case when cnt_cart_3d=0 then 0.0 when cnt_cart_3d>=20.0*cnt_cart_3d2nd then 20.0 else " + "cnt_cart_3d*1.0/cnt_cart_3d2nd end AS rt_cart_3d," + "case when cnt_click_1w=0 then 0.0 when cnt_click_1w>=300.0*cnt_click_w2nd then 300.0 else " + "cnt_click_1w*1.0/cnt_click_w2nd end AS rt_click_1w," + "case when cnt_buy_1w=0 then 0.0 when cnt_buy_1w>=15.0*cnt_buy_w2nd then 15.0 else " + "cnt_buy_1w*1.0/cnt_buy_w2nd end AS rt_buy_1w," + "case when cnt_collect_1w=0 then 0.0 when cnt_collect_1w>=15.0*cnt_collect_w2nd then 15.0 else " + "cnt_collect_1w*1.0/cnt_collect_w2nd end AS rt_collect_1w," + "case when cnt_cart_1w=0 then 0.0 when cnt_cart_1w>=40.0*cnt_cart_w2nd then 40.0 else " + "cnt_cart_1w*1.0/cnt_cart_w2nd end AS rt_cart_1w," + "case when cnt_click_1m=0 then 0.0 when cnt_click_1m>=500.0*cnt_click_m2nd then 500.0 else " + "cnt_click_1m*1.0/cnt_click_m2nd end AS rt_click_1m," + "case when cnt_buy_1m=0 then 0.0 when cnt_buy_1m>=30.0*cnt_buy_m2nd then 30.0 else " + "cnt_buy_1m*1.0/cnt_buy_m2nd end AS rt_buy_1m," + "case when cnt_collect_1m=0 then 0.0 when cnt_collect_1m>=30.0*cnt_collect_m2nd then 30.0 else " + "cnt_collect_1m*1.0/cnt_collect_m2nd end AS rt_collect_1m," + "case when cnt_cart_1m=0 then 0.0 when cnt_cart_1m>=50.0*cnt_cart_m2nd then 50.0 else " + "cnt_cart_1m*1.0/cnt_cart_m2nd end AS rt_cart_1m";
    t1_userbrand = t1_userbrand.select(clauseUserBrand_Rate);
    String clauseUser = "user_id, " + "SUM(is_click) as user_cnt_click, SUM(is_buy) as user_cnt_buy, " + "SUM(is_collect) as user_cnt_collect, SUM(is_cart) as user_cnt_cart, " + "SUM(is_click_1m) as user_cnt_click_1m, SUM(is_buy_1m) as user_cnt_buy_1m, " + "SUM(is_collect_1m) as user_cnt_collect_1m, SUM(is_cart_1m) as user_cnt_cart_1m, " + "SUM(is_click_2m) as user_cnt_click_2m, SUM(is_buy_2m) as user_cnt_buy_2m, " + "SUM(is_collect_2m) as user_cnt_collect_2m, SUM(is_cart_2m) as user_cnt_cart_2m, " + "SUM(is_click_3m) as user_cnt_click_3m, SUM(is_buy_3m) as user_cnt_buy_3m, " + "SUM(is_collect_3m) as user_cnt_collect_3m, SUM(is_cart_3m) as user_cnt_cart_3m, " + "SUM(is_click_m2nd) as user_cnt_click_m2nd, SUM(is_buy_m2nd) as user_cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as user_cnt_collect_m2nd, SUM(is_cart_m2nd) as user_cnt_cart_m2nd, " + "SUM(is_click_m3th) as user_cnt_click_m3th, SUM(is_buy_m3th) as user_cnt_buy_m3th, " + "SUM(is_collect_m3th) as user_cnt_collect_m3th, SUM(is_cart_m3th) as user_cnt_cart_m3th, " + "SUM(is_click_3d) as user_cnt_click_3d, SUM(is_buy_3d) as user_cnt_buy_3d, " + "SUM(is_collect_3d) as user_cnt_collect_3d, SUM(is_cart_3d) as user_cnt_cart_3d, " + "SUM(is_click_3d2nd) as user_cnt_click_3d2nd, SUM(is_buy_3d2nd) as user_cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as user_cnt_collect_3d2nd, SUM(is_cart_3d2nd) as user_cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as user_cnt_click_3d3th, SUM(is_buy_3d3th) as user_cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as user_cnt_collect_3d3th, SUM(is_cart_3d3th) as user_cnt_cart_3d3th, " + "SUM(is_click_1w) as user_cnt_click_1w, SUM(is_buy_1w) as user_cnt_buy_1w, " + "SUM(is_collect_1w) as user_cnt_collect_1w, SUM(is_cart_1w) as user_cnt_cart_1w, " + "SUM(is_click_w2nd) as user_cnt_click_w2nd, SUM(is_buy_w2nd) as user_cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as user_cnt_collect_w2nd, SUM(is_cart_w2nd) as user_cnt_cart_w2nd, " + "SUM(is_click_w3th) as user_cnt_click_w3th, SUM(is_buy_w3th) as user_cnt_buy_w3th, " + "SUM(is_collect_w3th) as user_cnt_collect_w3th, SUM(is_cart_w3th) as user_cnt_cart_w3th";
    BatchOperator t1_user = t1_preproc.groupBy("user_id", clauseUser);
    String clauseUser_Rate = "user_id AS user_id4join," + "user_cnt_click,user_cnt_buy,user_cnt_collect,user_cnt_cart," + "user_cnt_click_1m,user_cnt_buy_1m,user_cnt_collect_1m,user_cnt_cart_1m," + "user_cnt_click_2m,user_cnt_buy_2m,user_cnt_collect_2m,user_cnt_cart_2m," + "user_cnt_click_3m,user_cnt_buy_3m,user_cnt_collect_3m,user_cnt_cart_3m," + "user_cnt_click_m2nd,user_cnt_buy_m2nd,user_cnt_collect_m2nd,user_cnt_cart_m2nd," + "user_cnt_click_m3th,user_cnt_buy_m3th,user_cnt_collect_m3th,user_cnt_cart_m3th," + "user_cnt_click_3d,user_cnt_buy_3d,user_cnt_collect_3d,user_cnt_cart_3d," + "user_cnt_click_3d2nd,user_cnt_buy_3d2nd,user_cnt_collect_3d2nd,user_cnt_cart_3d2nd," + "user_cnt_click_3d3th,user_cnt_buy_3d3th,user_cnt_collect_3d3th,user_cnt_cart_3d3th," + "user_cnt_click_1w,user_cnt_buy_1w,user_cnt_collect_1w,user_cnt_cart_1w," + "user_cnt_click_w2nd,user_cnt_buy_w2nd,user_cnt_collect_w2nd,user_cnt_cart_w2nd," + "user_cnt_click_w3th,user_cnt_buy_w3th,user_cnt_collect_w3th,user_cnt_cart_w3th," + "case when user_cnt_buy>user_cnt_click then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_click end AS user_rt_click2buy," + "case when user_cnt_buy>user_cnt_collect then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_collect end AS user_rt_collect2buy," + "case when user_cnt_buy>user_cnt_cart then 1.0 when user_cnt_buy=0 then 0.0 else " + "user_cnt_buy*1.0/user_cnt_cart end AS user_rt_cart2buy," + "case when user_cnt_buy_3d>user_cnt_click_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_click_3d end AS user_rt_click2buy_3d," + "case when user_cnt_buy_3d>user_cnt_collect_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_collect_3d end AS user_rt_collect2buy_3d," + "case when user_cnt_buy_3d>user_cnt_cart_3d then 1.0 when user_cnt_buy_3d=0 then 0.0 else " + "user_cnt_buy_3d*1.0/user_cnt_cart_3d end AS user_rt_cart2buy_3d," + "case when user_cnt_buy_1w>user_cnt_click_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_click_1w end AS user_rt_click2buy_1w," + "case when user_cnt_buy_1w>user_cnt_collect_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_collect_1w end AS user_rt_collect2buy_1w," + "case when user_cnt_buy_1w>user_cnt_cart_1w then 1.0 when user_cnt_buy_1w=0 then 0.0 else " + "user_cnt_buy_1w*1.0/user_cnt_cart_1w end AS user_rt_cart2buy_1w," + "case when user_cnt_buy_1m>user_cnt_click_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_click_1m end AS user_rt_click2buy_1m," + "case when user_cnt_buy_1m>user_cnt_collect_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_collect_1m end AS user_rt_collect2buy_1m," + "case when user_cnt_buy_1m>user_cnt_cart_1m then 1.0 when user_cnt_buy_1m=0 then 0.0 else " + "user_cnt_buy_1m*1.0/user_cnt_cart_1m end AS user_rt_cart2buy_1m";
    t1_user = t1_user.select(clauseUser_Rate);
    String clauseBrand = "brand_id, " + "SUM(is_click) as brand_cnt_click, SUM(is_buy) as brand_cnt_buy, " + "SUM(is_collect) as brand_cnt_collect, SUM(is_cart) as brand_cnt_cart, " + "SUM(is_click_1m) as brand_cnt_click_1m, SUM(is_buy_1m) as brand_cnt_buy_1m, " + "SUM(is_collect_1m) as brand_cnt_collect_1m, SUM(is_cart_1m) as brand_cnt_cart_1m, " + "SUM(is_click_2m) as brand_cnt_click_2m, SUM(is_buy_2m) as brand_cnt_buy_2m, " + "SUM(is_collect_2m) as brand_cnt_collect_2m, SUM(is_cart_2m) as brand_cnt_cart_2m, " + "SUM(is_click_3m) as brand_cnt_click_3m, SUM(is_buy_3m) as brand_cnt_buy_3m, " + "SUM(is_collect_3m) as brand_cnt_collect_3m, SUM(is_cart_3m) as brand_cnt_cart_3m, " + "SUM(is_click_m2nd) as brand_cnt_click_m2nd, SUM(is_buy_m2nd) as brand_cnt_buy_m2nd, " + "SUM(is_collect_m2nd) as brand_cnt_collect_m2nd, SUM(is_cart_m2nd) as brand_cnt_cart_m2nd, " + "SUM(is_click_m3th) as brand_cnt_click_m3th, SUM(is_buy_m3th) as brand_cnt_buy_m3th, " + "SUM(is_collect_m3th) as brand_cnt_collect_m3th, SUM(is_cart_m3th) as brand_cnt_cart_m3th, " + "SUM(is_click_3d) as brand_cnt_click_3d, SUM(is_buy_3d) as brand_cnt_buy_3d, " + "SUM(is_collect_3d) as brand_cnt_collect_3d, SUM(is_cart_3d) as brand_cnt_cart_3d, " + "SUM(is_click_3d2nd) as brand_cnt_click_3d2nd, SUM(is_buy_3d2nd) as brand_cnt_buy_3d2nd, " + "SUM(is_collect_3d2nd) as brand_cnt_collect_3d2nd, SUM(is_cart_3d2nd) as brand_cnt_cart_3d2nd, " + "SUM(is_click_3d3th) as brand_cnt_click_3d3th, SUM(is_buy_3d3th) as brand_cnt_buy_3d3th, " + "SUM(is_collect_3d3th) as brand_cnt_collect_3d3th, SUM(is_cart_3d3th) as brand_cnt_cart_3d3th, " + "SUM(is_click_1w) as brand_cnt_click_1w, SUM(is_buy_1w) as brand_cnt_buy_1w, " + "SUM(is_collect_1w) as brand_cnt_collect_1w, SUM(is_cart_1w) as brand_cnt_cart_1w, " + "SUM(is_click_w2nd) as brand_cnt_click_w2nd, SUM(is_buy_w2nd) as brand_cnt_buy_w2nd, " + "SUM(is_collect_w2nd) as brand_cnt_collect_w2nd, SUM(is_cart_w2nd) as brand_cnt_cart_w2nd, " + "SUM(is_click_w3th) as brand_cnt_click_w3th, SUM(is_buy_w3th) as brand_cnt_buy_w3th, " + "SUM(is_collect_w3th) as brand_cnt_collect_w3th, SUM(is_cart_w3th) as brand_cnt_cart_w3th";
    BatchOperator t1_brand = t1_preproc.groupBy("brand_id", clauseBrand);
    String clauseBrand_Rate = "brand_id AS brand_id4join," + "brand_cnt_click,brand_cnt_buy,brand_cnt_collect,brand_cnt_cart," + "brand_cnt_click_1m,brand_cnt_buy_1m,brand_cnt_collect_1m,brand_cnt_cart_1m," + "brand_cnt_click_2m,brand_cnt_buy_2m,brand_cnt_collect_2m,brand_cnt_cart_2m," + "brand_cnt_click_3m,brand_cnt_buy_3m,brand_cnt_collect_3m,brand_cnt_cart_3m," + "brand_cnt_click_m2nd,brand_cnt_buy_m2nd,brand_cnt_collect_m2nd,brand_cnt_cart_m2nd," + "brand_cnt_click_m3th,brand_cnt_buy_m3th,brand_cnt_collect_m3th,brand_cnt_cart_m3th," + "brand_cnt_click_3d,brand_cnt_buy_3d,brand_cnt_collect_3d,brand_cnt_cart_3d," + "brand_cnt_click_3d2nd,brand_cnt_buy_3d2nd,brand_cnt_collect_3d2nd,brand_cnt_cart_3d2nd," + "brand_cnt_click_3d3th,brand_cnt_buy_3d3th,brand_cnt_collect_3d3th,brand_cnt_cart_3d3th," + "brand_cnt_click_1w,brand_cnt_buy_1w,brand_cnt_collect_1w,brand_cnt_cart_1w," + "brand_cnt_click_w2nd,brand_cnt_buy_w2nd,brand_cnt_collect_w2nd,brand_cnt_cart_w2nd," + "brand_cnt_click_w3th,brand_cnt_buy_w3th,brand_cnt_collect_w3th,brand_cnt_cart_w3th," + "case when brand_cnt_buy>brand_cnt_click then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_click end AS brand_rt_click2buy," + "case when brand_cnt_buy>brand_cnt_collect then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_collect end AS brand_rt_collect2buy," + "case when brand_cnt_buy>brand_cnt_cart then 1.0 when brand_cnt_buy=0 then 0.0 else " + "brand_cnt_buy*1.0/brand_cnt_cart end AS brand_rt_cart2buy," + "case when brand_cnt_buy_3d>brand_cnt_click_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_click_3d end AS brand_rt_click2buy_3d," + "case when brand_cnt_buy_3d>brand_cnt_collect_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_collect_3d end AS brand_rt_collect2buy_3d," + "case when brand_cnt_buy_3d>brand_cnt_cart_3d then 1.0 when brand_cnt_buy_3d=0 then 0.0 else " + "brand_cnt_buy_3d*1.0/brand_cnt_cart_3d end AS brand_rt_cart2buy_3d," + "case when brand_cnt_buy_1w>brand_cnt_click_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_click_1w end AS brand_rt_click2buy_1w," + "case when brand_cnt_buy_1w>brand_cnt_collect_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_collect_1w end AS brand_rt_collect2buy_1w," + "case when brand_cnt_buy_1w>brand_cnt_cart_1w then 1.0 when brand_cnt_buy_1w=0 then 0.0 else " + "brand_cnt_buy_1w*1.0/brand_cnt_cart_1w end AS brand_cart2buy_1w," + "case when brand_cnt_buy_1m>brand_cnt_click_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_click_1m end AS brand_rt_click2buy_1m," + "case when brand_cnt_buy_1m>brand_cnt_collect_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_collect_1m end AS brand_rt_collect2buy_1m," + "case when brand_cnt_buy_1m>brand_cnt_cart_1m then 1.0 when brand_cnt_buy_1m=0 then 0.0 else " + "brand_cnt_buy_1m*1.0/brand_cnt_cart_1m end AS brand_rt_cart2buy_1m";
    t1_brand = t1_brand.select(clauseBrand_Rate);
    BatchOperator t1_join = new JoinBatchOp().setSelectClause("*").setJoinPredicate("user_id=user_id4join").linkFrom(t1_userbrand, t1_user);
    t1_join = new JoinBatchOp().setSelectClause("*").setJoinPredicate("brand_id=brand_id4join").linkFrom(t1_join, t1_brand);
    BatchOperator t2_label = t2.filter("type=1").select("user_id AS user_id4label, brand_id AS brand_id4label, 1 as label").distinct();
    BatchOperator feature_label = new LeftOuterJoinBatchOp().setSelectClause("*").setJoinPredicate("user_id = user_id4label AND brand_id = brand_id4label").linkFrom(t1_join, t2_label);
    Imputer imputer = new Imputer().setStrategy("value").setFillValue("0").setSelectedCols("label");
    feature_label = imputer.fit(feature_label).transform(feature_label);
    System.out.println(feature_label.getSchema());
    String[] featureColNames = ArrayUtils.removeElements(feature_label.getColNames(), new String[] { "user_id", "brand_id", "user_id4join", "brand_id4join", "user_id4label", "brand_id4label", LABEL_COL_NAME });
    StringBuilder sbd = new StringBuilder();
    for (String name : featureColNames) {
        sbd.append("CAST(").append(name).append(" AS DOUBLE) AS ").append(name).append(", ");
    }
    sbd.append(LABEL_COL_NAME);
    feature_label.select(sbd.toString()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + FEATURE_LABEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
}
Also used : LeftOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp) Imputer(com.alibaba.alink.pipeline.dataproc.Imputer) LeftOuterJoinBatchOp(com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp) JoinBatchOp(com.alibaba.alink.operator.batch.sql.JoinBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 3 with JoinBatchOp

use of com.alibaba.alink.operator.batch.sql.JoinBatchOp in project Alink by alibaba.

the class Node2VecImpl method linkFrom.

@Override
public T linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    BatchOperator[] transResult = GraphEmbedding.trans2Index(in, null, this.getParams());
    BatchOperator vocab = transResult[0];
    BatchOperator indexedGraph = transResult[1];
    try {
        this.setOutputTable(new JoinBatchOp("word=" + GraphEmbedding.NODE_INDEX_COL, GraphEmbedding.NODE_COL + ",vec").setMLEnvironmentId(getMLEnvironmentId()).linkFrom(indexedGraph.link(new Node2VecWalkBatchOp(this.getParams().clone()).setSourceCol(GraphEmbedding.SOURCE_COL).setTargetCol(GraphEmbedding.TARGET_COL).setWeightCol(GraphEmbedding.WEIGHT_COL)).link(new Word2VecImpl<>(this.getParams().clone(), checkpoint).setSelectedCol(Node2VecWalkBatchOp.PATH_COL_NAME)).select("CAST(word AS BIGINT) AS word, vec"), vocab).getOutputTable());
        return (T) this;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : Node2VecWalkBatchOp(com.alibaba.alink.operator.batch.graph.Node2VecWalkBatchOp) JoinBatchOp(com.alibaba.alink.operator.batch.sql.JoinBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 4 with JoinBatchOp

use of com.alibaba.alink.operator.batch.sql.JoinBatchOp in project Alink by alibaba.

the class DeepWalkImpl method linkFrom.

@Override
public T linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    BatchOperator[] transResult = GraphEmbedding.trans2Index(in, null, this.getParams());
    BatchOperator vocab = transResult[0];
    BatchOperator indexedGraph = transResult[1];
    try {
        this.setOutputTable(new JoinBatchOp("word=" + GraphEmbedding.NODE_INDEX_COL, GraphEmbedding.NODE_COL + ",vec").setMLEnvironmentId(getMLEnvironmentId()).linkFrom(indexedGraph.link(new RandomWalkBatchOp(this.getParams().clone()).setSourceCol(GraphEmbedding.SOURCE_COL).setTargetCol(GraphEmbedding.TARGET_COL).setWeightCol(GraphEmbedding.WEIGHT_COL)).link(new Word2VecImpl<>(this.getParams().clone(), checkpoint).setSelectedCol(RandomWalkBatchOp.PATH_COL_NAME)).select("CAST(word AS BIGINT) AS word, vec"), vocab).getOutputTable());
        return (T) this;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : JoinBatchOp(com.alibaba.alink.operator.batch.sql.JoinBatchOp) RandomWalkBatchOp(com.alibaba.alink.operator.batch.graph.RandomWalkBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 5 with JoinBatchOp

use of com.alibaba.alink.operator.batch.sql.JoinBatchOp in project Alink by alibaba.

the class MetaPath2VecImpl method linkFrom.

@Override
public T linkFrom(BatchOperator<?>... inputs) {
    checkOpSize(2, inputs);
    BatchOperator<?> in1 = inputs[0];
    BatchOperator<?> in2 = inputs[1];
    BatchOperator[] transResult = GraphEmbedding.trans2Index(in1, in2, this.getParams());
    BatchOperator vocab = transResult[0];
    BatchOperator indexedGraph = transResult[1];
    BatchOperator indexWithType = transResult[2];
    try {
        BatchOperator metaPathWalk = new MetaPathWalkBatchOp(this.getParams().clone()).setSourceCol(GraphEmbedding.SOURCE_COL).setTargetCol(GraphEmbedding.TARGET_COL).setWeightCol(GraphEmbedding.WEIGHT_COL).setVertexCol(GraphEmbedding.NODE_INDEX_COL).setTypeCol(GraphEmbedding.NODE_TYPE_COL).linkFrom(indexedGraph, indexWithType);
        BatchOperator<?> word2vec;
        if (getMode().equals(Mode.METAPATH2VECPP)) {
            word2vec = new LabeledWord2VecImpl<>(this.getParams().clone(), checkpoint).setSelectedCol(RandomWalkBatchOp.PATH_COL_NAME).setVertexCol(GraphEmbedding.NODE_INDEX_COL).setTypeCol(GraphEmbedding.NODE_TYPE_COL).linkFrom(metaPathWalk, indexWithType.select("CAST(" + GraphEmbedding.NODE_INDEX_COL + " AS VARCHAR) AS " + GraphEmbedding.NODE_INDEX_COL + ", " + GraphEmbedding.NODE_TYPE_COL));
        } else {
            word2vec = new Word2VecImpl<>(this.getParams().clone(), checkpoint).setSelectedCol(RandomWalkBatchOp.PATH_COL_NAME).linkFrom(metaPathWalk);
        }
        this.setOutputTable(new JoinBatchOp("word=" + GraphEmbedding.NODE_INDEX_COL, GraphEmbedding.NODE_COL + ",vec").setMLEnvironmentId(getMLEnvironmentId()).linkFrom(word2vec.select("CAST(word AS BIGINT) AS word, vec"), vocab).getOutputTable());
        return (T) this;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : MetaPathWalkBatchOp(com.alibaba.alink.operator.batch.graph.MetaPathWalkBatchOp) JoinBatchOp(com.alibaba.alink.operator.batch.sql.JoinBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Aggregations

BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)7 JoinBatchOp (com.alibaba.alink.operator.batch.sql.JoinBatchOp)7 LeftOuterJoinBatchOp (com.alibaba.alink.operator.batch.sql.LeftOuterJoinBatchOp)2 Row (org.apache.flink.types.Row)2 OutputColsHelper (com.alibaba.alink.common.utils.OutputColsHelper)1 MetaPathWalkBatchOp (com.alibaba.alink.operator.batch.graph.MetaPathWalkBatchOp)1 Node2VecWalkBatchOp (com.alibaba.alink.operator.batch.graph.Node2VecWalkBatchOp)1 RandomWalkBatchOp (com.alibaba.alink.operator.batch.graph.RandomWalkBatchOp)1 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)1 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)1 FullOuterJoinBatchOp (com.alibaba.alink.operator.batch.sql.FullOuterJoinBatchOp)1 RightOuterJoinBatchOp (com.alibaba.alink.operator.batch.sql.RightOuterJoinBatchOp)1 Method (com.alibaba.alink.operator.common.nlp.Method)1 KeywordsExtractionParams (com.alibaba.alink.params.nlp.KeywordsExtractionParams)1 Imputer (com.alibaba.alink.pipeline.dataproc.Imputer)1 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1 List (java.util.List)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1