use of com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp in project Alink by alibaba.
the class LSTNetTrainBatchOpTest method testStreamMultiVar.
@Test
public void testStreamMultiVar() throws Exception {
BatchOperator.setParallelism(1);
final int numCols = 10;
final String timeColName = "ts";
final String vecColName = "vec";
final String selectClause = "TO_TIMESTAMP(" + timeColName + ") as " + timeColName + ", " + vecColName;
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(1000L).setNumCols(numCols);
String[] selectedColNames = source.getColNames();
AppendIdBatchOp appendIdBatchOp = new AppendIdBatchOp().setIdCol(timeColName).linkFrom(source);
ColumnsToVectorBatchOp columnsToVectorBatchOp = new ColumnsToVectorBatchOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(appendIdBatchOp);
BatchOperator<?> timeBatchOp = new SelectBatchOp().setClause(selectClause).linkFrom(columnsToVectorBatchOp);
LSTNetTrainBatchOp trainOp = new LSTNetTrainBatchOp().setVectorCol(vecColName).setTimeCol(timeColName).setWindow(24 * 7).setHorizon(12).setNumEpochs(1).linkFrom(timeBatchOp);
StreamOperator<?> sourceStreamOp = new RandomTableSourceStreamOp().setNumCols(numCols).setMaxRows(1000L);
ColumnsToVectorStreamOp columnsToVectorStreamOp = new ColumnsToVectorStreamOp().setSelectedCols(selectedColNames).setVectorCol(vecColName).linkFrom(sourceStreamOp);
AppendIdStreamOp appendIdStreamOp = new AppendIdStreamOp().setIdCol(timeColName).linkFrom(columnsToVectorStreamOp);
StreamOperator<?> timestampStreamOp = new SelectStreamOp().setClause(selectClause).linkFrom(appendIdStreamOp);
OverCountWindowStreamOp overCountWindowStreamOp = new OverCountWindowStreamOp().setClause("MTABLE_AGG_PRECEDING(" + timeColName + ", " + vecColName + ") as col_agg").setTimeCol(timeColName).setPrecedingRows(24 * 7).linkFrom(timestampStreamOp);
LSTNetPredictStreamOp predictStreamOp = new LSTNetPredictStreamOp(trainOp).setValueCol("col_agg").setPredictionCol("pred").setReservedCols(timeColName).linkFrom(overCountWindowStreamOp);
FilePath tmpAkFile = new FilePath(new Path(folder.getRoot().getPath(), "lstnet_test_stream_multi_var_result.ak"));
predictStreamOp.link(new AkSinkStreamOp().setOverwriteSink(true).setFilePath(tmpAkFile));
StreamOperator.execute();
}
use of com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp in project Alink by alibaba.
the class TFTableModelPredictStreamOpTest method test.
@Category(DLTest.class)
@Test
public void test() throws Exception {
int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
BatchOperator.setParallelism(3);
BatchOperator<?> source = new RandomTableSourceBatchOp().setNumRows(100L).setNumCols(10);
String[] colNames = source.getColNames();
source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
String label = "label";
StreamOperator<?> streamSource = new RandomTableSourceStreamOp().setNumCols(10).setMaxRows(100L);
Map<String, Object> userParams = new HashMap<>();
userParams.put("featureCols", JsonConverter.toJson(colNames));
userParams.put("labelCol", label);
userParams.put("batch_size", 16);
userParams.put("num_epochs", 1);
TFTableModelTrainBatchOp tfTableModelTrainBatchOp = new TFTableModelTrainBatchOp().setUserFiles(new String[] { "res:///tf_dnn_train.py" }).setMainScriptFile("res:///tf_dnn_train.py").setUserParams(JsonConverter.toJson(userParams)).setNumWorkers(2).setNumPSs(1).linkFrom(source);
TFTableModelPredictStreamOp tfTableModelPredictStreamOp = new TFTableModelPredictStreamOp(tfTableModelTrainBatchOp).setOutputSchemaStr("logits double").setOutputSignatureDefs(new String[] { "logits" }).setSignatureDefKey("predict").setSelectedCols(colNames).linkFrom(streamSource);
tfTableModelPredictStreamOp.print();
StreamOperator.execute();
StreamOperator.setParallelism(savedStreamParallelism);
}
use of com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp in project Alink by alibaba.
the class TensorFlow2StreamOpTest method testAllReduce.
@Test
public void testAllReduce() throws Exception {
int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF231);
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
StreamOperator.setParallelism(3);
DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
String[] colNames = source.getColNames();
source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
String label = "label";
Map<String, Object> userParams = new HashMap<>();
userParams.put("featureCols", JsonConverter.toJson(colNames));
userParams.put("labelCol", label);
userParams.put("batch_size", 16);
userParams.put("num_epochs", 1);
TensorFlow2StreamOp tensorFlow2StreamOp = new TensorFlow2StreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setNumWorkers(3).setNumPSs(0).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
tensorFlow2StreamOp.print();
StreamOperator.execute();
StreamOperator.setParallelism(savedStreamParallelism);
}
use of com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp in project Alink by alibaba.
the class TensorFlow2StreamOpTest method testWithAutoWorkersPSs.
@Test
public void testWithAutoWorkersPSs() throws Exception {
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF231);
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
StreamOperator.setParallelism(3);
DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
String[] colNames = source.getColNames();
source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
String label = "label";
Map<String, Object> userParams = new HashMap<>();
userParams.put("featureCols", JsonConverter.toJson(colNames));
userParams.put("labelCol", label);
userParams.put("batch_size", 16);
userParams.put("num_epochs", 1);
TensorFlow2StreamOp tensorFlow2StreamOp = new TensorFlow2StreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
tensorFlow2StreamOp.print();
StreamOperator.execute();
StreamOperator.setParallelism(savedStreamParallelism);
}
use of com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp in project Alink by alibaba.
the class TensorFlowStreamOpTest method testWithAutoWorkersPSs.
@Test
public void testWithAutoWorkersPSs() throws Exception {
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF115);
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
StreamOperator.setParallelism(3);
DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
String[] colNames = source.getColNames();
source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
String label = "label";
Map<String, Object> userParams = new HashMap<>();
userParams.put("featureCols", JsonConverter.toJson(colNames));
userParams.put("labelCol", label);
userParams.put("batch_size", 16);
userParams.put("num_epochs", 1);
TensorFlowStreamOp tensorFlowStreamOp = new TensorFlowStreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
tensorFlowStreamOp.print();
StreamOperator.execute();
StreamOperator.setParallelism(savedStreamParallelism);
}
Aggregations