Search in sources :

Example 1 with TypeConvertStreamOp

use of com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp in project Alink by alibaba.

the class DLTypeUtils method doubleColumnsToFloat.

public static StreamOperator<?> doubleColumnsToFloat(StreamOperator<?> input) {
    List<String> doubleColNames = new ArrayList<>();
    String[] colNames = input.getColNames();
    TypeInformation<?>[] colTypes = input.getColTypes();
    for (int i = 0; i < colTypes.length; i += 1) {
        if (colTypes[i].equals(Types.DOUBLE)) {
            doubleColNames.add(colNames[i]);
        }
    }
    if (doubleColNames.size() > 0) {
        TypeConvertStreamOp typeConvertStreamOp = new TypeConvertStreamOp().setTargetType(TargetType.FLOAT).setSelectedCols(doubleColNames.toArray(new String[0])).setMLEnvironmentId(input.getMLEnvironmentId());
        input = typeConvertStreamOp.linkFrom(input);
    }
    return input;
}
Also used : TypeConvertStreamOp(com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp) ArrayList(java.util.ArrayList) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 2 with TypeConvertStreamOp

use of com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp in project Alink by alibaba.

the class BaseDLStreamOp method linkFrom.

// User's main script is renamed to `userScriptMainFileName`, and `main` is called
@Override
public T linkFrom(StreamOperator<?>... inputs) {
    initDLSystemParams();
    StreamOperator<?> in = checkAndGetFirst(inputs);
    if (null != getSelectedCols()) {
        in = in.select(getSelectedCols());
    }
    in = DLTypeUtils.doubleColumnsToFloat(in);
    List<String> doubleColNames = new ArrayList<>();
    String[] colNames = in.getColNames();
    TypeInformation<?>[] colTypes = in.getColTypes();
    for (int i = 0; i < colTypes.length; i += 1) {
        if (colTypes[i].equals(Types.DOUBLE)) {
            doubleColNames.add(colNames[i]);
        }
    }
    if (doubleColNames.size() > 0) {
        TypeConvertStreamOp typeConvertStreamOp = new TypeConvertStreamOp().setTargetType(TargetType.FLOAT).setSelectedCols(doubleColNames.toArray(new String[0]));
        in = typeConvertStreamOp.linkFrom(in);
    }
    ExternalFilesConfig externalFilesConfig = getUserFiles().addFilePaths(resPyFiles).addRenameMap(getMainScriptFile(), userScriptMainFileName);
    DLLauncherStreamOp dlLauncherStreamOp = new DLLauncherStreamOp().setOutputSchemaStr(getOutputSchemaStr()).setEntryFunc(entryFuncName).setMainScriptFile(mainScriptFileName).setUserFiles(externalFilesConfig).setUserParams(getUserParams()).setNumWorkers(getNumWorkers()).setNumPSs(numPss).setPythonEnv(getPythonEnv()).linkFrom(in);
    setOutputTable(dlLauncherStreamOp.getOutputTable());
    return (T) this;
}
Also used : TypeConvertStreamOp(com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp) ArrayList(java.util.ArrayList) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 3 with TypeConvertStreamOp

use of com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp in project Alink by alibaba.

the class TensorFlow2StreamOpTest method testAllReduce.

@Test
public void testAllReduce() throws Exception {
    int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
    RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF231);
    pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
    StreamOperator.setParallelism(3);
    DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
    StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
    String[] colNames = source.getColNames();
    source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
    source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
    String label = "label";
    Map<String, Object> userParams = new HashMap<>();
    userParams.put("featureCols", JsonConverter.toJson(colNames));
    userParams.put("labelCol", label);
    userParams.put("batch_size", 16);
    userParams.put("num_epochs", 1);
    TensorFlow2StreamOp tensorFlow2StreamOp = new TensorFlow2StreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setNumWorkers(3).setNumPSs(0).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
    tensorFlow2StreamOp.print();
    StreamOperator.execute();
    StreamOperator.setParallelism(savedStreamParallelism);
}
Also used : TypeConvertStreamOp(com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp) PluginDownloader(com.alibaba.alink.common.io.plugin.PluginDownloader) HashMap(java.util.HashMap) RegisterKey(com.alibaba.alink.common.io.plugin.RegisterKey) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) Test(org.junit.Test) DLTest(com.alibaba.alink.testutil.categories.DLTest)

Example 4 with TypeConvertStreamOp

use of com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp in project Alink by alibaba.

the class TensorFlow2StreamOpTest method testWithAutoWorkersPSs.

@Test
public void testWithAutoWorkersPSs() throws Exception {
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
    RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF231);
    pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
    int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
    StreamOperator.setParallelism(3);
    DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
    StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
    String[] colNames = source.getColNames();
    source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
    source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
    String label = "label";
    Map<String, Object> userParams = new HashMap<>();
    userParams.put("featureCols", JsonConverter.toJson(colNames));
    userParams.put("labelCol", label);
    userParams.put("batch_size", 16);
    userParams.put("num_epochs", 1);
    TensorFlow2StreamOp tensorFlow2StreamOp = new TensorFlow2StreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
    tensorFlow2StreamOp.print();
    StreamOperator.execute();
    StreamOperator.setParallelism(savedStreamParallelism);
}
Also used : TypeConvertStreamOp(com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp) PluginDownloader(com.alibaba.alink.common.io.plugin.PluginDownloader) HashMap(java.util.HashMap) RegisterKey(com.alibaba.alink.common.io.plugin.RegisterKey) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) Test(org.junit.Test) DLTest(com.alibaba.alink.testutil.categories.DLTest)

Example 5 with TypeConvertStreamOp

use of com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp in project Alink by alibaba.

the class TensorFlowStreamOpTest method testWithAutoWorkersPSs.

@Test
public void testWithAutoWorkersPSs() throws Exception {
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
    RegisterKey registerKey = DLEnvConfig.getRegisterKey(Version.TF115);
    pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
    int savedStreamParallelism = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment().getParallelism();
    StreamOperator.setParallelism(3);
    DLLauncherStreamOp.DL_CLUSTER_START_TIME = 30 * 1000;
    StreamOperator<?> source = new RandomTableSourceStreamOp().setMaxRows(1000L).setNumCols(10);
    String[] colNames = source.getColNames();
    source = source.select("*, case when RAND() > 0.5 then 1. else 0. end as label");
    source = source.link(new TypeConvertStreamOp().setSelectedCols("num").setTargetType(TargetType.DOUBLE));
    String label = "label";
    Map<String, Object> userParams = new HashMap<>();
    userParams.put("featureCols", JsonConverter.toJson(colNames));
    userParams.put("labelCol", label);
    userParams.put("batch_size", 16);
    userParams.put("num_epochs", 1);
    TensorFlowStreamOp tensorFlowStreamOp = new TensorFlowStreamOp().setUserFiles(new String[] { "res:///tf_dnn_stream.py" }).setMainScriptFile("res:///tf_dnn_stream.py").setUserParams(JsonConverter.toJson(userParams)).setOutputSchemaStr("model_id long, model_info string").linkFrom(source);
    tensorFlowStreamOp.print();
    StreamOperator.execute();
    StreamOperator.setParallelism(savedStreamParallelism);
}
Also used : TypeConvertStreamOp(com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp) PluginDownloader(com.alibaba.alink.common.io.plugin.PluginDownloader) HashMap(java.util.HashMap) RegisterKey(com.alibaba.alink.common.io.plugin.RegisterKey) RandomTableSourceStreamOp(com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp) Test(org.junit.Test) DLTest(com.alibaba.alink.testutil.categories.DLTest)

Aggregations

TypeConvertStreamOp (com.alibaba.alink.operator.stream.dataproc.TypeConvertStreamOp)5 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 RandomTableSourceStreamOp (com.alibaba.alink.operator.stream.source.RandomTableSourceStreamOp)3 DLTest (com.alibaba.alink.testutil.categories.DLTest)3 HashMap (java.util.HashMap)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2