use of com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp in project Alink by alibaba.
the class LSTNetTrainBatchOpTest method testStreamMultiVar.
@Test
public void testStreamMultiVar() throws Exception {
BatchOperator.setParallelism(1);
final int numCols = 10;
final String timeColName = "ts";
final String vecColName = "vec";
final String selectClause = "TO_TIMESTAMP(" + timeColName + ") as " + timeColName + ", " + vecColName;
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
String[] selectedColNames = source.getColNames();
AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(appendIdBatchOp);
BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(selectClause).linkFrom(columnsToVectorBatchOp);
LSTNetTrainBatchOp trainOp = new LSTNetTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setHorizon(12).setNumEpochs(1).linkFrom(timeBatchOp);
StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(sourceStreamOp);
AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(columnsToVectorStreamOp);
StreamOperator<?> timestampStreamOp = new SelectStreamOp().setClause(selectClause).linkFrom(appendIdStreamOp);
OverCountWindowStreamOp overCountWindowStreamOp = new OverCountWindowStreamOp().setClause("MTABLE_AGG_PRECEDING(" + timeColName + ", " + vecColName + ") as col_agg").setTimeCol(timeColName).setPrecedingRows(24 * 7).linkFrom(timestampStreamOp);
LSTNetPredictStreamOp predictStreamOp = new LSTNetPredictStreamOp(trainOp).setValueCol("col_agg").setPredictionCol("pred").setReservedCols(timeColName).linkFrom(overCountWindowStreamOp);
FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "lstnet_test_stream_multi_var_result.ak"));
predictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp in project Alink by alibaba.
the class VectorStandardScalerTest method test.
@Test
public void test() throws Exception {
BatchOperator batchData = new TableSourceBatchOp(GenerateData.getDenseBatch()).link(new AppendIdBatchOp().setIdCol("id"));
StreamOperator streamData = new TableSourceStreamOp(GenerateData.getDenseStream());
VectorStandardScalerTrainBatchOp op = new VectorStandardScalerTrainBatchOp().setWithMean(true).setWithStd(true).setSelectedCol("vec").linkFrom(batchData);
BatchOperator res = new VectorStandardScalerPredictBatchOp().setOutputCol("vec_1").linkFrom(op, batchData);
List<Row> list = res.collect();
Collections.sort(list, new Comparator<Row>() {
@Override
public int compare(Row o1, Row o2) {
return Long.compare((long) o1.getField(1), (long) o2.getField(1));
}
});
assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -0.9272, -1.1547 }));
assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { -0.1325, 0.5774 }));
assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 1.0596, 0.5774 }));
new VectorStandardScalerPredictStreamOp(op).setOutputCol("vec_1").linkFrom(streamData).print();
VectorStandardScalerModel model1 = new VectorStandardScaler().setWithMean(true).setWithStd(false).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
list = model1.transform(batchData).collect();
Collections.sort(list, new Comparator<Row>() {
@Override
public int compare(Row o1, Row o2) {
return Long.compare((long) o1.getField(1), (long) o2.getField(1));
}
});
assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -2.3333, -3.3333 }));
assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { -0.3333, 1.6666 }));
assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 2.6666, 1.6666 }));
model1.transform(streamData).print();
VectorStandardScalerModel model2 = new VectorStandardScaler().setWithMean(false).setWithStd(true).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
list = model2.transform(batchData).collect();
Collections.sort(list, new Comparator<Row>() {
@Override
public int compare(Row o1, Row o2) {
return Long.compare((long) o1.getField(1), (long) o2.getField(1));
}
});
assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -0.3974, -1.0392 }));
assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { 0.3974, 0.6928 }));
assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 1.5894, 0.6928 }));
model2.transform(streamData).print();
VectorStandardScalerModel model3 = new VectorStandardScaler().setWithMean(false).setWithStd(false).setSelectedCol("vec").setOutputCol("vec_1").fit(batchData);
list = model3.transform(batchData).collect();
Collections.sort(list, new Comparator<Row>() {
@Override
public int compare(Row o1, Row o2) {
return Long.compare((long) o1.getField(1), (long) o2.getField(1));
}
});
assertDv(VectorUtil.getDenseVector(list.get(1).getField(2)), new DenseVector(new double[] { -1., -3. }));
assertDv(VectorUtil.getDenseVector(list.get(0).getField(2)), new DenseVector(new double[] { 1., 2. }));
assertDv(VectorUtil.getDenseVector(list.get(2).getField(2)), new DenseVector(new double[] { 4., 2. }));
model3.transform(streamData).print();
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp in project Alink by alibaba.
the class DeepARTrainBatchOpTest method testSingleVar.
@Test
public void testSingleVar() throws Exception {
BatchOperator.setParallelism(1);
final String timeColName = "ts";
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(1);
String colName = source.getColNames()[0];
AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdBatchOp);
StringBuilder groupByPredicate = new StringBuilder();
String selectClause = timeColName + String.format(", SUM(%s) as %s", colName, colName);
groupByPredicate.append(timeColName);
BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setSelectedCol(colName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(groupedTimeBatchOp);
StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(1).setMaxRows(1000L);
AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", colName, timeColName, timeColName)).linkFrom(appendIdStreamOp);
String selectClausePred = String.format("TUMBLE_START() as %s", timeColName) + String.format(", SUM(%s) as %s", colName, colName);
TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred).linkFrom(timeStreamOp);
HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, colName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(timeWindowStreamOp);
DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").setPredictNum(24).linkFrom(hopTimeWindowStreamOp);
FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_single_var_result.ak"));
deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp in project Alink by alibaba.
the class DeepARTrainBatchOpTest method testMultiVar.
@Test
public void testMultiVar() throws Exception {
BatchOperator.setParallelism(1);
final String timeColName = "ts";
final int numCols = 10;
final String vecColName = "vec";
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
String[] colNames = source.getColNames();
AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdBatchOp);
StringBuilder selectClause = new StringBuilder();
StringBuilder groupByPredicate = new StringBuilder();
selectClause.append(timeColName);
for (int i = 0; i < numCols; ++i) {
selectClause.append(", ");
selectClause.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
}
groupByPredicate.append(timeColName);
BatchOperator<?> groupedTimeBatchOp = new GroupByBatchOp().setSelectClause(selectClause.toString()).setGroupByPredicate(groupByPredicate.toString()).linkFrom(timeBatchOp);
ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(groupedTimeBatchOp);
BatchOperator<?> deepArTrainBatchOp = new DeepARTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setStride(24).setNumEpochs(1).linkFrom(columnsToVectorBatchOp);
StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
StreamOperator<?> timeStreamOp = new SelectStreamOp().setClause(String.format("%s, FLOOR(TO_TIMESTAMP(%s * 3600000) TO HOUR) as %s", Joiner.on(",").join(colNames), timeColName, timeColName)).linkFrom(appendIdStreamOp);
StringBuilder selectClausePred = new StringBuilder();
selectClausePred.append(String.format("TUMBLE_START() as %s", timeColName));
for (int i = 0; i < numCols; ++i) {
selectClausePred.append(", ");
selectClausePred.append(String.format("SUM(%s) as %s", colNames[i], colNames[i]));
}
TumbleTimeWindowStreamOp timeWindowStreamOp = new TumbleTimeWindowStreamOp().setWindowTime(3600).setTimeCol(timeColName).setClause(selectClausePred.toString()).linkFrom(timeStreamOp);
ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(colNames).setVectorCol(vecColName).linkFrom(timeWindowStreamOp);
HopTimeWindowStreamOp hopTimeWindowStreamOp = new HopTimeWindowStreamOp().setTimeCol(timeColName).setClause(String.format("MTABLE_AGG(%s, %s) as %s", timeColName, vecColName, "mt")).setHopTime(24 * 3600).setWindowTime((24 * 7 - 24) * 3600).linkFrom(columnsToVectorStreamOp);
DeepARPredictStreamOp deepARPredictStreamOp = new DeepARPredictStreamOp(deepArTrainBatchOp).setValueCol("mt").setPredictionCol("pred").linkFrom(hopTimeWindowStreamOp);
FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "deepar_test_stream_multi_var_result.ak"));
deepARPredictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.AppendIdBatchOp in project Alink by alibaba.
the class LSTNetTrainBatchOpTest method testStreamSingleVar.
@Test
public void testStreamSingleVar() throws Exception {
BatchOperator.setParallelism(1);
final int numCols = 1;
final String timeColName = "ts";
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
String colName = source.getColNames()[0];
final String selectClause = "TO_TIMESTAMP(" + timeColName + ") as " + timeColName + ", " + colName;
AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(selectClause).linkFrom(appendIdBatchOp);
LSTNetTrainBatchOp trainOp = new LSTNetTrainBatchOp().setSelectedCol(colName).setTimeCol(timeColName).setWindow(24 * 7).setHorizon(12).setNumEpochs(1).linkFrom(timeBatchOp);
StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(6000L);
AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(sourceStreamOp);
StreamOperator<?> timestampStreamOp = new SelectStreamOp().setClause(selectClause).linkFrom(appendIdStreamOp);
OverCountWindowStreamOp overTimeWindowStreamOp = new OverCountWindowStreamOp().setClause("MTABLE_AGG_PRECEDING(" + timeColName + ", " + colName + ") as col_agg").setTimeCol(timeColName).setPrecedingRows(24 * 7).linkFrom(timestampStreamOp);
LSTNetPredictStreamOp predictStreamOp = new LSTNetPredictStreamOp(trainOp).setValueCol("col_agg").setPredictionCol("pred").setReservedCols(timeColName).setNumThreads(4).linkFrom(overTimeWindowStreamOp);
FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "lstnet_test_stream_single_var_result.ak"));
predictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
StreamOperator.execute();
}
Aggregations