Search in sources :

Example 1 with SelectBatchOp

use of com.alibaba.alink.operator.batch.sql.SelectBatchOp in project Alink by alibaba.

the class LSTNetTrainBatchOpTest method testStreamMultiVar.

@Test
public void testStreamMultiVar() throws Exception {
    BatchOperator.setParallelism(1);
    final int numCols = 10;
    final String timeColName = "ts";
    final String vecColName = "vec";
    final String selectClause = "TO_TIMESTAMP(" + timeColName + ") as " + timeColName + ", " + vecColName;
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
    String[] selectedColNames = source.getColNames();
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(appendIdBatchOp);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(selectClause).linkFrom(columnsToVectorBatchOp);
    LSTNetTrainBatchOp trainOp = new LSTNetTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setHorizon(12).setNumEpochs(1).linkFrom(timeBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
    ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(sourceStreamOp);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(columnsToVectorStreamOp);
    StreamOperator<?> timestampStreamOp = new SelectStreamOp().setClause(selectClause).linkFrom(appendIdStreamOp);
    OverCountWindowStreamOp overCountWindowStreamOp = new OverCountWindowStreamOp().setClause("MTABLE_AGG_PRECEDING(" + timeColName + ", " + vecColName + ") as col_agg").setTimeCol(timeColName).setPrecedingRows(24 * 7).linkFrom(timestampStreamOp);
    LSTNetPredictStreamOp predictStreamOp = new LSTNetPredictStreamOp(trainOp).setValueCol("col_agg").setPredictionCol("pred").setReservedCols(timeColName).linkFrom(overCountWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "lstnet_test_stream_multi_var_result.ak"));
    predictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) ColumnsToVectorStreamOp(com.alibaba.alink.operator.stream.dataproc.format.ColumnsToVectorStreamOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) OverCountWindowStreamOp(com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp) ColumnsToVectorBatchOp(com.alibaba.alink.operator.batch.dataproc.format.ColumnsToVectorBatchOp) LSTNetPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.LSTNetPredictStreamOp) Test(org.junit.Test)

Example 2 with SelectBatchOp

use of com.alibaba.alink.operator.batch.sql.SelectBatchOp in project Alink by alibaba.

the class SelectMapper method prepareIoSchema.

@Override
protected Tuple4<String[], String[], TypeInformation<?>[], String[]> prepareIoSchema(TableSchema dataSchema, Params params) {
    String clause = params.get(SelectParams.CLAUSE);
    MemSourceBatchOp source = new MemSourceBatchOp(Collections.emptyList(), dataSchema);
    TableSchema outputSchema = source.linkTo(new SelectBatchOp().setClause(clause)).getSchema();
    return Tuple4.of(dataSchema.getFieldNames(), outputSchema.getFieldNames(), outputSchema.getFieldTypes(), new String[0]);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) TableSchema(org.apache.flink.table.api.TableSchema) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp)

Example 3 with SelectBatchOp

use of com.alibaba.alink.operator.batch.sql.SelectBatchOp in project Alink by alibaba.

the class DeepARTrainBatchOpTest method testSingleVar.

@Test
public void testSingleVar() throws Exception {
    BatchOperator.setParallelism(1);
    final String timeColName = "ts";
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(1);
    String colName = source.getColNames()[0];
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdBatchOp);
    StringBuilder groupByPredicate = new StringBuilder();
    String selectClause = timeColName + String.format(", SUM(%s) as %s", colName, colName);
    groupByPredicate.append(timeColName);
    BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
    BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setSelectedCol(colName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(groupedTimeBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(1).setMaxRows(1000L);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
    StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdStreamOp);
    String selectClausePred = String.format("TUMBLE_START() as %s", timeColName) + String.format(", SUM(%s) as %s", colName, colName);
    TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred).linkFrom(timeStreamOp);
    HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, colName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(timeWindowStreamOp);
    DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").setPredictNum(24).linkFrom(hopTimeWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_single_var_result.ak"));
    deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) DeepARPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) TumbleTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) HopTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp) Test(org.junit.Test)

Example 4 with SelectBatchOp

use of com.alibaba.alink.operator.batch.sql.SelectBatchOp in project Alink by alibaba.

the class DeepARTrainBatchOpTest method testMultiVar.

@Test
public void testMultiVar() throws Exception {
    BatchOperator.setParallelism(1);
    final String timeColName = "ts";
    final int numCols = 10;
    final String vecColName = "vec";
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
    String[] colNames = source.getColNames();
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdBatchOp);
    StringBuilder selectClause = new StringBuilder();
    StringBuilder groupByPredicate = new StringBuilder();
    selectClause.append(timeColName);
    for (int i = 0; i < numCols; ++i) {
        selectClause.append(", ");
        selectClause.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
    }
    groupByPredicate.append(timeColName);
    BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause.toString()).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
    ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(groupedTimeBatchOp);
    BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(columnsToVectorBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
    StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdStreamOp);
    StringBuilder selectClausePred = new StringBuilder();
    selectClausePred.append(String.format("TUMBLE_START() as %s", timeColName));
    for (int i = 0; i < numCols; ++i) {
        selectClausePred.append(", ");
        selectClausePred.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
    }
    TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred.toString()).linkFrom(timeStreamOp);
    ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(timeWindowStreamOp);
    HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, vecColName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(columnsToVectorStreamOp);
    DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").linkFrom(hopTimeWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_multi_var_result.ak"));
    deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) DeepARPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) TumbleTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) GroupByBatchOp(com.alibaba.alink.operator.batch.sql.GroupByBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) ColumnsToVectorStreamOp(com.alibaba.alink.operator.stream.dataproc.format.ColumnsToVectorStreamOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) ColumnsToVectorBatchOp(com.alibaba.alink.operator.batch.dataproc.format.ColumnsToVectorBatchOp) HopTimeWindowStreamOp(com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp) Test(org.junit.Test)

Example 5 with SelectBatchOp

use of com.alibaba.alink.operator.batch.sql.SelectBatchOp in project Alink by alibaba.

the class LSTNetTrainBatchOpTest method testStreamSingleVar.

@Test
public void testStreamSingleVar() throws Exception {
    BatchOperator.setParallelism(1);
    final int numCols = 1;
    final String timeColName = "ts";
    BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
    String colName = source.getColNames()[0];
    final String selectClause = "TO_TIMESTAMP(" + timeColName + ") as " + timeColName + ", " + colName;
    AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
    BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(selectClause).linkFrom(appendIdBatchOp);
    LSTNetTrainBatchOp trainOp = new LSTNetTrainBatchOp().setSelectedCol(colName).setTimeCol(timeColName).setWindow(24 * 7).setHorizon(12).setNumEpochs(1).linkFrom(timeBatchOp);
    StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(6000L);
    AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
    StreamOperator<?> timestampStreamOp = new SelectStreamOp().setClause(selectClause).linkFrom(appendIdStreamOp);
    OverCountWindowStreamOp overTimeWindowStreamOp = new OverCountWindowStreamOp().setClause("MTABLE_AGG_PRECEDING(" + timeColName + ", " + colName + ") as col_agg").setTimeCol(timeColName).setPrecedingRows(24 * 7).linkFrom(timestampStreamOp);
    LSTNetPredictStreamOp predictStreamOp = new LSTNetPredictStreamOp(trainOp).setValueCol("col_agg").setPredictionCol("pred").setReservedCols(timeColName).setNumThreads(4).linkFrom(overTimeWindowStreamOp);
    FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "lstnet_test_stream_single_var_result.ak"));
    predictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
    StreamOperator.execute();
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) Path(org.apache.flink.core.fs.Path) FilePath(com.alibaba.alink.common.io.filesystem.FilePath) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AppendIdStreamOp(com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp) SelectBatchOp(com.alibaba.alink.operator.batch.sql.SelectBatchOp) RandomTableSourceBatchOp(com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp) AppendIdBatchOp(com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) SelectStreamOp(com.alibaba.alink.operator.stream.sql.SelectStreamOp) OverCountWindowStreamOp(com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp) LSTNetPredictStreamOp(com.alibaba.alink.operator.stream.timeseries.LSTNetPredictStreamOp) Test(org.junit.Test)

Aggregations

SelectBatchOp (com.alibaba.alink.operator.batch.sql.SelectBatchOp)5 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)4 AppendIdBatchOp (com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp)4 RandomTableSourceBatchOp (com.alibaba.alink.operator.batch.source.RandomTableSourceBatchOp)4 AppendIdStreamOp (com.alibaba.alink.operator.stream.dataproc.AppendIdStreamOp)4 AkSinkStreamOp (com.alibaba.alink.operator.stream.sink.AkSinkStreamOp)4 RandomTableSourceStreamOp (com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp)4 SelectStreamOp (com.alibaba.alink.operator.stream.sql.SelectStreamOp)4 Path (org.apache.flink.core.fs.Path)4 Test (org.junit.Test)4 ColumnsToVectorBatchOp (com.alibaba.alink.operator.batch.dataproc.format.ColumnsToVectorBatchOp)2 GroupByBatchOp (com.alibaba.alink.operator.batch.sql.GroupByBatchOp)2 ColumnsToVectorStreamOp (com.alibaba.alink.operator.stream.dataproc.format.ColumnsToVectorStreamOp)2 HopTimeWindowStreamOp (com.alibaba.alink.operator.stream.feature.HopTimeWindowStreamOp)2 OverCountWindowStreamOp (com.alibaba.alink.operator.stream.feature.OverCountWindowStreamOp)2 TumbleTimeWindowStreamOp (com.alibaba.alink.operator.stream.feature.TumbleTimeWindowStreamOp)2 DeepARPredictStreamOp (com.alibaba.alink.operator.stream.timeseries.DeepARPredictStreamOp)2 LSTNetPredictStreamOp (com.alibaba.alink.operator.stream.timeseries.LSTNetPredictStreamOp)2 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)1 TableSchema (org.apache.flink.table.api.TableSchema)1