Search in sources :

Example 11 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class ParquetTableProviderTest method testWriteAndReadTable.

@Test
public void testWriteAndReadTable() {
    File destinationFile = new File(tempFolder.getRoot(), "person-info/");
    BeamSqlEnv env = BeamSqlEnv.inMemory(new ParquetTableProvider());
    env.executeDdl(String.format("CREATE EXTERNAL TABLE PersonInfo %s TYPE parquet LOCATION '%s'", FIELD_NAMES, destinationFile.getAbsolutePath()));
    BeamSqlRelUtils.toPCollection(writePipeline, env.parseQuery("INSERT INTO PersonInfo VALUES ('Alan', 22, 'England'), ('John', 42, 'USA')"));
    writePipeline.run().waitUntilFinish();
    PCollection<Row> rows = BeamSqlRelUtils.toPCollection(readPipeline, env.parseQuery("SELECT * FROM PersonInfo"));
    PAssert.that(rows).containsInAnyOrder(Row.withSchema(TABLE_SCHEMA).addValues("Alan", 22L, "England").build(), Row.withSchema(TABLE_SCHEMA).addValues("John", 42L, "USA").build());
    PCollection<Row> filtered = BeamSqlRelUtils.toPCollection(readPipeline, env.parseQuery("SELECT * FROM PersonInfo WHERE age > 25"));
    PAssert.that(filtered).containsInAnyOrder(Row.withSchema(TABLE_SCHEMA).addValues("John", 42L, "USA").build());
    PCollection<Row> projected = BeamSqlRelUtils.toPCollection(readPipeline, env.parseQuery("SELECT age, country FROM PersonInfo"));
    PAssert.that(projected).containsInAnyOrder(Row.withSchema(PROJECTED_SCHEMA).addValues(22L, "England").build(), Row.withSchema(PROJECTED_SCHEMA).addValues(42L, "USA").build());
    PCollection<Row> filteredAndProjected = BeamSqlRelUtils.toPCollection(readPipeline, env.parseQuery("SELECT age, country FROM PersonInfo WHERE age > 25"));
    PAssert.that(filteredAndProjected).containsInAnyOrder(Row.withSchema(PROJECTED_SCHEMA).addValues(42L, "USA").build());
    PipelineResult.State state = readPipeline.run().waitUntilFinish();
    assertEquals(State.DONE, state);
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) File(java.io.File) Test(org.junit.Test)

Example 12 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testSQLTypes.

@Test
public void testSQLTypes() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
    sqlEnv.parseQuery(insertStatement);
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    assertThat(bigQueryTestingTypes.getFlatJsonRows(SOURCE_SCHEMA_TWO), containsInAnyOrder(row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456"))));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Test(org.junit.Test)

Example 13 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testSQLRead_withDirectRead_withProjectPushDown.

@Test
public void testSQLRead_withDirectRead_withProjectPushDown() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "' \n" + "TBLPROPERTIES " + "'{ " + METHOD_PROPERTY + ": \"" + Method.DIRECT_READ.toString() + "\" }'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
    sqlEnv.parseQuery(insertStatement);
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    String selectTableStatement = "SELECT c_integer, c_varchar, c_tinyint FROM TEST";
    BeamRelNode relNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, relNode);
    assertThat(relNode, instanceOf(BeamPushDownIOSourceRel.class));
    // IO projects fields in the same order they are defined in the schema.
    assertThat(relNode.getRowType().getFieldNames(), containsInAnyOrder("c_tinyint", "c_integer", "c_varchar"));
    // Field reordering is done in a Calc
    assertThat(output.getSchema(), equalTo(Schema.builder().addNullableField("c_integer", INT32).addNullableField("c_varchar", STRING).addNullableField("c_tinyint", BYTE).build()));
    PAssert.that(output).containsInAnyOrder(row(output.getSchema(), 2147483647, "varchar", (byte) 127));
    PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    assertThat(state, equalTo(State.DONE));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamPushDownIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel) State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 14 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testSQLWriteAndRead.

@Test
public void testSQLWriteAndRead() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
    sqlEnv.parseQuery(insertStatement);
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    String selectTableStatement = "SELECT * FROM TEST";
    PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));
    PAssert.that(output).containsInAnyOrder(row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456")));
    PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    assertThat(state, equalTo(State.DONE));
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 15 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testInsertSelect.

@Test
public void testInsertSelect() throws Exception {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider(pipeline, "ORDERS_IN_MEMORY", row(SOURCE_SCHEMA, 1L, "foo", Arrays.asList("111", "aaa")), row(SOURCE_SCHEMA, 2L, "bar", Arrays.asList("222", "bbb")), row(SOURCE_SCHEMA, 3L, "baz", Arrays.asList("333", "ccc"))), new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE ORDERS_BQ( \n" + "   id BIGINT, \n" + "   name VARCHAR, \n " + "   arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQuery.tableSpec() + "'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO ORDERS_BQ \n" + " SELECT \n" + "    id as `id`, \n" + "    name as `name`, \n" + "    arr as `arr` \n" + " FROM ORDERS_IN_MEMORY";
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    List<Row> allJsonRows = bigQuery.getFlatJsonRows(SOURCE_SCHEMA);
    assertThat(allJsonRows, containsInAnyOrder(row(SOURCE_SCHEMA, 1L, "foo", Arrays.asList("111", "aaa")), row(SOURCE_SCHEMA, 2L, "bar", Arrays.asList("222", "bbb")), row(SOURCE_SCHEMA, 3L, "baz", Arrays.asList("333", "ccc"))));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)61 Test (org.junit.Test)54 Row (org.apache.beam.sdk.values.Row)36 TestTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider)16 PipelineResult (org.apache.beam.sdk.PipelineResult)10 State (org.apache.beam.sdk.PipelineResult.State)10 Schema (org.apache.beam.sdk.schemas.Schema)10 TableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider)8 Arrays (java.util.Arrays)6 List (java.util.List)6 Map (java.util.Map)6 ExecutorService (java.util.concurrent.ExecutorService)6 Collectors (java.util.stream.Collectors)6 PayloadMessages (org.apache.beam.sdk.extensions.protobuf.PayloadMessages)6 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)5 Serializable (java.io.Serializable)5 StandardCharsets (java.nio.charset.StandardCharsets)5