use of org.apache.zeppelin.interpreter.InterpreterResultMessage in project zeppelin by apache.
the class IPyFlinkInterpreterTest method testBatchPyFlink.
public static void testBatchPyFlink(Interpreter pyflinkInterpreter, LazyOpenInterpreter flinkScalaInterpreter) throws InterpreterException, IOException {
InterpreterContext context = createInterpreterContext();
InterpreterResult result = pyflinkInterpreter.interpret("import tempfile\n" + "import os\n" + "import shutil\n" + "sink_path = tempfile.gettempdir() + '/batch.csv'\n" + "if os.path.exists(sink_path):\n" + " if os.path.isfile(sink_path):\n" + " os.remove(sink_path)\n" + " else:\n" + " shutil.rmtree(sink_path)\n" + "b_env.set_parallelism(1)\n" + "t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" + "bt_env.connect(FileSystem().path(sink_path)) \\\n" + " .with_format(OldCsv()\n" + " .field_delimiter(',')\n" + " .field(\"a\", DataTypes.BIGINT())\n" + " .field(\"b\", DataTypes.STRING())\n" + " .field(\"c\", DataTypes.STRING())) \\\n" + " .with_schema(Schema()\n" + " .field(\"a\", DataTypes.BIGINT())\n" + " .field(\"b\", DataTypes.STRING())\n" + " .field(\"c\", DataTypes.STRING())) \\\n" + " .create_temporary_table(\"batch_sink\")\n" + "t.select(\"a + 1, b, c\").insert_into(\"batch_sink\")\n" + "bt_env.execute(\"batch_job\")", context);
assertEquals(result.toString(), InterpreterResult.Code.SUCCESS, result.code());
// use group by
context = createInterpreterContext();
result = pyflinkInterpreter.interpret("import tempfile\n" + "import os\n" + "import shutil\n" + "sink_path = tempfile.gettempdir() + '/streaming.csv'\n" + "if os.path.exists(sink_path):\n" + " if os.path.isfile(sink_path):\n" + " os.remove(sink_path)\n" + " else:\n" + " shutil.rmtree(sink_path)\n" + "b_env.set_parallelism(1)\n" + "t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" + "bt_env.connect(FileSystem().path(sink_path)) \\\n" + " .with_format(OldCsv()\n" + " .field_delimiter(',')\n" + " .field(\"a\", DataTypes.STRING())\n" + " .field(\"b\", DataTypes.BIGINT())\n" + " .field(\"c\", DataTypes.BIGINT())) \\\n" + " .with_schema(Schema()\n" + " .field(\"a\", DataTypes.STRING())\n" + " .field(\"b\", DataTypes.BIGINT())\n" + " .field(\"c\", DataTypes.BIGINT())) \\\n" + " .create_temporary_table(\"batch_sink4\")\n" + "t.group_by(\"c\").select(\"c, sum(a), count(b)\").insert_into(\"batch_sink4\")\n" + "bt_env.execute(\"batch_job4\")", context);
assertEquals(result.toString(), InterpreterResult.Code.SUCCESS, result.code());
// use scala udf in pyflink
// define scala udf
result = flinkScalaInterpreter.interpret("class AddOne extends ScalarFunction {\n" + " def eval(a: java.lang.Long): String = a + \"\1\"\n" + "}", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkScalaInterpreter.interpret("btenv.registerFunction(\"addOne\", new AddOne())", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = createInterpreterContext();
result = pyflinkInterpreter.interpret("import tempfile\n" + "import os\n" + "import shutil\n" + "sink_path = tempfile.gettempdir() + '/streaming.csv'\n" + "if os.path.exists(sink_path):\n" + " if os.path.isfile(sink_path):\n" + " os.remove(sink_path)\n" + " else:\n" + " shutil.rmtree(sink_path)\n" + "b_env.set_parallelism(1)\n" + "t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" + "bt_env.connect(FileSystem().path(sink_path)) \\\n" + " .with_format(OldCsv()\n" + " .field_delimiter(',')\n" + " .field(\"a\", DataTypes.BIGINT())\n" + " .field(\"b\", DataTypes.STRING())\n" + " .field(\"c\", DataTypes.STRING())) \\\n" + " .with_schema(Schema()\n" + " .field(\"a\", DataTypes.BIGINT())\n" + " .field(\"b\", DataTypes.STRING())\n" + " .field(\"c\", DataTypes.STRING())) \\\n" + " .create_temporary_table(\"batch_sink3\")\n" + "t.select(\"a, addOne(a), c\").insert_into(\"batch_sink3\")\n" + "bt_env.execute(\"batch_job3\")", context);
assertEquals(result.toString(), InterpreterResult.Code.SUCCESS, result.code());
// z.show
context = createInterpreterContext();
result = pyflinkInterpreter.interpret("import tempfile\n" + "import os\n" + "import shutil\n" + "sink_path = tempfile.gettempdir() + '/streaming.csv'\n" + "if os.path.exists(sink_path):\n" + " if os.path.isfile(sink_path):\n" + " os.remove(sink_path)\n" + " else:\n" + " shutil.rmtree(sink_path)\n" + "b_env.set_parallelism(1)\n" + "t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" + "z.show(t)", context);
assertEquals(result.toString(), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
FlinkVersion flinkVersion = ((FlinkInterpreter) flinkScalaInterpreter.getInnerInterpreter()).getFlinkVersion();
if (flinkVersion.isAfterFlink114()) {
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("z.show(DataSet) is not supported after Flink 1.14", resultMessages.get(0).getData());
} else {
assertEquals(context.out.toString(), 1, resultMessages.size());
assertEquals(context.out.toString(), InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals(context.out.toString(), "a\tb\tc\n1\thi\thello\n2\thi\thello\n", resultMessages.get(0).getData());
}
}
use of org.apache.zeppelin.interpreter.InterpreterResultMessage in project zeppelin by apache.
the class FlinkStreamSqlInterpreterTest method testUpdateStreamTableApi.
@Test
public void testUpdateStreamTableApi() throws IOException, InterpreterException {
String initStreamScalaScript = getInitStreamScript(100);
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
String code = "val table = stenv.sqlQuery(\"select url, count(1) as pv from log group by url\")\nz.show(table, streamType=\"update\")";
result = flinkInterpreter.interpret(code, context);
assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(), resultMessages.get(0).getData().contains("url\tpv\n"));
}
use of org.apache.zeppelin.interpreter.InterpreterResultMessage in project zeppelin by apache.
the class FlinkStreamSqlInterpreterTest method testSingleStreamTableApi.
@Test
public void testSingleStreamTableApi() throws IOException, InterpreterException {
String initStreamScalaScript = getInitStreamScript(100);
InterpreterContext context = getInterpreterContext();
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
String code = "val table = stenv.sqlQuery(\"select max(rowtime), count(1) from log\")\nz.show(table,streamType=\"single\", configs = Map(\"template\" -> \"Total Count: {1} <br/> {0}\"))";
result = flinkInterpreter.interpret(code, context);
assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.ANGULAR, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(), resultMessages.get(0).getData().contains("Total Count"));
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\nlog\n", resultMessages.get(0).getData());
}
use of org.apache.zeppelin.interpreter.InterpreterResultMessage in project zeppelin by apache.
the class FlinkStreamSqlInterpreterTest method testResumeStreamSqlFromSavePoint.
// TODO(zjffdu) flaky test
// @Test
public void testResumeStreamSqlFromSavePoint() throws IOException, InterpreterException, InterruptedException, TimeoutException {
String initStreamScalaScript = getInitStreamScript(1000);
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
File savePointDir = FileUtils.getTempDirectory();
final Waiter waiter = new Waiter();
Thread thread = new Thread(() -> {
try {
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("savePointDir", savePointDir.getAbsolutePath());
context.getLocalProperties().put("parallelism", "1");
context.getLocalProperties().put("maxParallelism", "10");
InterpreterResult result2 = sqlInterpreter.interpret("select url, count(1) as pv from " + "log group by url", context);
System.out.println("------------" + context.out.toString());
System.out.println("------------" + result2);
waiter.assertTrue(context.out.toString().contains("url\tpv\n"));
waiter.assertEquals(InterpreterResult.Code.SUCCESS, result2.code());
} catch (Exception e) {
e.printStackTrace();
waiter.fail("Should not fail here");
}
waiter.resume();
});
thread.start();
// the streaming job will run for 20 seconds. check init_stream.scala
// sleep 10 seconds to make sure the job is started but not finished
Thread.sleep(10 * 1000);
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("savePointDir", savePointDir.getAbsolutePath());
context.getLocalProperties().put("parallelism", "2");
context.getLocalProperties().put("maxParallelism", "10");
sqlInterpreter.cancel(context);
waiter.await(10 * 1000);
// resume job from savepoint
sqlInterpreter.interpret("select url, count(1) as pv from " + "log group by url", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(), resultMessages.get(0).getData().contains("url\tpv\n"));
}
use of org.apache.zeppelin.interpreter.InterpreterResultMessage in project zeppelin by apache.
the class FlinkStreamSqlInterpreterTest method testAppendStreamSql.
@Test
public void testAppendStreamSql() throws IOException, InterpreterException {
String initStreamScalaScript = getInitStreamScript(100);
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("type", "append");
result = sqlInterpreter.interpret("select TUMBLE_START(rowtime, INTERVAL '5' SECOND) as " + "start_time, url, count(1) as pv from log group by " + "TUMBLE(rowtime, INTERVAL '5' SECOND), url", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(), resultMessages.get(0).getData().contains("url\tpv\n"));
}
Aggregations