Search in sources :

Example 6 with GroupByBatchOp

use of com.alibaba.alink.operator.batch.sql.GroupByBatchOp in project Alink by alibaba.

the class AutoArimaBatchOpTest method test.

@Test
public void test() throws Exception {
    List<Row> mTableData = Arrays.asList(Row.of(1, new Timestamp(1), 10.0), Row.of(1, new Timestamp(2), 11.0), Row.of(1, new Timestamp(3), 12.0), Row.of(1, new Timestamp(4), 13.0), Row.of(1, new Timestamp(5), 14.0), Row.of(1, new Timestamp(6), 15.0), Row.of(1, new Timestamp(7), 16.0), Row.of(1, new Timestamp(8), 17.0), Row.of(1, new Timestamp(9), 18.0), Row.of(1, new Timestamp(10), 19.0));
    MemSourceBatchOp source = new MemSourceBatchOp(mTableData, new String[] { "id", "ts", "val" });
    source.link(new GroupByBatchOp().setGroupByPredicate("id").setSelectClause("mtable_agg(ts, val) as data")).link(new AutoArimaBatchOp().setValueCol("data").setPredictionCol("pred").setPredictNum(12)).print();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) Row(org.apache.flink.types.Row) Timestamp(java.sql.Timestamp) Test(org.junit.Test)

Example 7 with GroupByBatchOp

use of com.alibaba.alink.operator.batch.sql.GroupByBatchOp in project Alink by alibaba.

the class AutoGarchBatchOpTest method test.

@Test
public void test() throws Exception {
    List<Row> mTableData = Arrays.asList(Row.of(1, new Timestamp(1), 10.0), Row.of(1, new Timestamp(2), 11.0), Row.of(1, new Timestamp(3), 12.0), Row.of(1, new Timestamp(4), 13.0), Row.of(1, new Timestamp(5), 14.0), Row.of(1, new Timestamp(6), 15.0), Row.of(1, new Timestamp(7), 16.0), Row.of(1, new Timestamp(8), 17.0), Row.of(1, new Timestamp(9), 18.0), Row.of(1, new Timestamp(10), 19.0));
    MemSourceBatchOp source = new MemSourceBatchOp(mTableData, new String[] { "id", "ts", "val" });
    source.link(new GroupByBatchOp().setGroupByPredicate("id").setSelectClause("mtable_agg(ts, val) as data")).link(new AutoGarchBatchOp().setValueCol("data").setIcType("AIC").setPredictNum(10).setMaxOrder(4).setIfGARCH11(false).setMinusMean(false).setPredictionCol("pred")).print();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) Row(org.apache.flink.types.Row) Timestamp(java.sql.Timestamp) Test(org.junit.Test)

Example 8 with GroupByBatchOp

use of com.alibaba.alink.operator.batch.sql.GroupByBatchOp in project Alink by alibaba.

the class FlattenMTableTest method test2.

@Test
public void test2() throws Exception {
    List<Row> rows = new ArrayList<>();
    rows.add(Row.of("a1", "11L", 2.2));
    rows.add(Row.of("a1", "12L", 2.0));
    rows.add(Row.of("a2", "11L", 2.0));
    rows.add(Row.of("a2", "12L", 2.0));
    rows.add(Row.of("a3", "12L", 2.0));
    rows.add(Row.of("a3", "13L", 2.0));
    rows.add(Row.of("a4", "13L", 2.0));
    rows.add(Row.of("a4", "14L", 2.0));
    rows.add(Row.of("a5", "14L", 2.0));
    rows.add(Row.of("a5", "15L", 2.0));
    rows.add(Row.of("a6", "15L", 2.0));
    rows.add(Row.of("a6", "16L", 2.0));
    BatchOperator<?> input = new MemSourceBatchOp(rows, "id string, f0 string, f1 double");
    GroupByBatchOp zip = new GroupByBatchOp().setGroupByPredicate("id").setSelectClause("id, mtable_agg(f0, f1) as m_table_col");
    FlattenMTableBatchOp flatten = new FlattenMTableBatchOp().setReservedCols("id").setSelectedCol("m_table_col").setSchemaStr("f0 string, f1 int");
    List<Row> res = zip.linkFrom(input).link(flatten).collect();
    for (Row row : res) {
        Assert.assertEquals(2, row.getField(2));
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) ArrayList(java.util.ArrayList) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 9 with GroupByBatchOp

use of com.alibaba.alink.operator.batch.sql.GroupByBatchOp in project Alink by alibaba.

the class DeepARTrainBatchOpTest method testSingleVar.

@Test
public void testSingleVar() throws Exception {
    BatchOperator.setParallelism(1);
    final String timeColName = "ts";
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(1);
    String colName = source.getColNames()[0];
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdBatchOp);
    StringBuilder groupByPredicate = new StringBuilder();
    String selectClause = timeColName + String.format(", SUM(%s) as %s", colName, colName);
    groupByPredicate.append(timeColName);
    BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
    BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setSelectedCol(colName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(groupedTimeBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(1).setMaxRows(1000L);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
    StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdStreamOp);
    String selectClausePred = String.format("TUMBLE_START() as %s", timeColName) + String.format(", SUM(%s) as %s", colName, colName);
    TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred).linkFrom(timeStreamOp);
    HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, colName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(timeWindowStreamOp);
    DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").setPredictNum(24).linkFrom(hopTimeWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_single_var_result.ak"));
    deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) DeepARPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) TumbleTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) HopTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp) Test(org.junit.Test)

Example 10 with GroupByBatchOp

use of com.alibaba.alink.operator.batch.sql.GroupByBatchOp in project Alink by alibaba.

the class DeepARTrainBatchOpTest method testMultiVar.

@Test
public void testMultiVar() throws Exception {
    BatchOperator.setParallelism(1);
    final String timeColName = "ts";
    final int numCols = 10;
    final String vecColName = "vec";
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
    String[] colNames = source.getColNames();
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdBatchOp);
    StringBuilder selectClause = new StringBuilder();
    StringBuilder groupByPredicate = new StringBuilder();
    selectClause.append(timeColName);
    for (int i = 0; i < numCols; ++i) {
        selectClause.append(", ");
        selectClause.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
    }
    groupByPredicate.append(timeColName);
    BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause.toString()).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
    ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(groupedTimeBatchOp);
    BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(columnsToVectorBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
    StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdStreamOp);
    StringBuilder selectClausePred = new StringBuilder();
    selectClausePred.append(String.format("TUMBLE_START() as %s", timeColName));
    for (int i = 0; i < numCols; ++i) {
        selectClausePred.append(", ");
        selectClausePred.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
    }
    TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred.toString()).linkFrom(timeStreamOp);
    ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(timeWindowStreamOp);
    HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, vecColName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(columnsToVectorStreamOp);
    DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").linkFrom(hopTimeWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_multi_var_result.ak"));
    deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) DeepARPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) TumbleTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) ColumnsToVectorStreamOp(com.alibaba.alink.operator.stream.dataproc.format.ColumnsToVectorStreamOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) ColumnsToVectorBatchOp(com.alibaba.alink.operator.batch.dataproc.format.ColumnsToVectorBatchOp) HopTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp) Test(org.junit.Test)

Aggregations

GroupByBatchOp (com.alibaba.alink.operator.batch.sql.GroupByBatchOp)13 Test (org.junit.Test)13 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)11 Row (org.apache.flink.types.Row)11 Timestamp (java.sql.Timestamp)7 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)2 AppendIdBatchOp (com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp)2 RandomTableSourceBatchOp (com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp)2 SelectBatchOp (com.alibaba.alink.operator.batch.sql.SelectBatchOp)2 AppendIdStreamOp (com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp)2 HopTimeWindowStreamOp (com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp)2 TumbleTimeWindowStreamOp (com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp)2 AkSinkStreamOp (com.alibaba.alink.operator.stream.sink.AkSinkStreamOp)2 RandomTableSourceStreamOp (com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp)2 SelectStreamOp (com.alibaba.alink.operator.stream.sql.SelectStreamOp)2 DeepARPredictStreamOp (com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp)2 Path (org.apache.flink.core.fs.Path)2 FlattenMTableBatchOp (com.alibaba.alink.operator.batch.dataproc.FlattenMTableBatchOp)1 ColumnsToVectorBatchOp (com.alibaba.alink.operator.batch.dataproc.format.ColumnsToVectorBatchOp)1 ColumnsToVectorStreamOp (com.alibaba.alink.operator.stream.dataproc.format.ColumnsToVectorStreamOp)1