Search in sources :

Example 56 with BeamRelNode

use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode in project beam by apache.

the class BigQueryReadWriteIT method testSQLRead_withDirectRead_withProjectPushDown.

@Test
public void testSQLRead_withDirectRead_withProjectPushDown() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "' \n" + "TBLPROPERTIES " + "'{ " + METHOD_PROPERTY + ": \"" + Method.DIRECT_READ.toString() + "\" }'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
    sqlEnv.parseQuery(insertStatement);
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    String selectTableStatement = "SELECT c_integer, c_varchar, c_tinyint FROM TEST";
    BeamRelNode relNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, relNode);
    assertThat(relNode, instanceOf(BeamPushDownIOSourceRel.class));
    // IO projects fields in the same order they are defined in the schema.
    assertThat(relNode.getRowType().getFieldNames(), containsInAnyOrder("c_tinyint", "c_integer", "c_varchar"));
    // Field reordering is done in a Calc
    assertThat(output.getSchema(), equalTo(Schema.builder().addNullableField("c_integer", INT32).addNullableField("c_varchar", STRING).addNullableField("c_tinyint", BYTE).build()));
    PAssert.that(output).containsInAnyOrder(row(output.getSchema(), 2147483647, "varchar", (byte) 127));
    PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    assertThat(state, equalTo(State.DONE));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamPushDownIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel) State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 57 with BeamRelNode

use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode in project beam by apache.

the class TestTableProviderWithFilterAndProjectPushDown method testIOSourceRel_predicateSimple.

@Test
public void testIOSourceRel_predicateSimple() {
    String selectTableStatement = "SELECT name FROM TEST where id=2";
    BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    assertThat(beamRelNode, instanceOf(BeamIOSourceRel.class));
    assertEquals(Schema.builder().addStringField("name").build(), result.getSchema());
    PAssert.that(result).containsInAnyOrder(row(result.getSchema(), "two"));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 58 with BeamRelNode

use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode in project beam by apache.

the class TestTableProviderWithFilterAndProjectPushDown method testIOSourceRel_selectFieldsInRandomOrder_withRename_withSupportedFilter.

@Test
public void testIOSourceRel_selectFieldsInRandomOrder_withRename_withSupportedFilter() {
    String selectTableStatement = "SELECT name as new_name, id as new_id, unused1 as new_unused1 FROM TEST where 1<id";
    BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    assertThat(beamRelNode, instanceOf(BeamIOSourceRel.class));
    // Make sure project push-down was done
    List<String> a = beamRelNode.getRowType().getFieldNames();
    assertThat(a, containsInAnyOrder("new_name", "new_id", "new_unused1"));
    assertEquals(Schema.builder().addStringField("new_name").addInt32Field("new_id").addInt32Field("new_unused1").build(), result.getSchema());
    PAssert.that(result).containsInAnyOrder(row(result.getSchema(), "two", 2, 200));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 59 with BeamRelNode

use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode in project beam by apache.

the class TestTableProviderWithFilterAndProjectPushDown method testIOSourceRel_selectOneFieldsMoreThanOnce.

@Test
public void testIOSourceRel_selectOneFieldsMoreThanOnce() {
    String selectTableStatement = "SELECT b, b, b, b, b FROM TEST";
    BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    // Calc must not be dropped
    assertThat(beamRelNode, instanceOf(BeamCalcRel.class));
    assertThat(beamRelNode.getInput(0), instanceOf(BeamIOSourceRel.class));
    // Make sure project push-down was done
    List<String> pushedFields = beamRelNode.getInput(0).getRowType().getFieldNames();
    assertThat(pushedFields, containsInAnyOrder("b"));
    assertEquals(Schema.builder().addBooleanField("b").addBooleanField("b0").addBooleanField("b1").addBooleanField("b2").addBooleanField("b3").build(), result.getSchema());
    PAssert.that(result).containsInAnyOrder(row(result.getSchema(), true, true, true, true, true), row(result.getSchema(), false, false, false, false, false));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamCalcRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamCalcRel) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 60 with BeamRelNode

use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode in project beam by apache.

the class TestTableProviderWithFilterAndProjectPushDown method testIOSourceRel_withComplexProjects_withSupportedFilter.

@Test
public void testIOSourceRel_withComplexProjects_withSupportedFilter() {
    String selectTableStatement = "SELECT name as new_name, unused1+10-id as new_id FROM TEST where 1<id";
    BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
    PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    assertThat(beamRelNode, instanceOf(BeamCalcRel.class));
    assertThat(beamRelNode.getInput(0), instanceOf(BeamIOSourceRel.class));
    // Make sure project push-down was done
    List<String> a = beamRelNode.getInput(0).getRowType().getFieldNames();
    assertThat(a, containsInAnyOrder("name", "unused1", "id"));
    assertEquals(Schema.builder().addStringField("new_name").addInt32Field("new_id").build(), result.getSchema());
    PAssert.that(result).containsInAnyOrder(row(result.getSchema(), "two", 208));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamCalcRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamCalcRel) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)246 Test (org.junit.Test)241 Row (org.apache.beam.sdk.values.Row)207 Schema (org.apache.beam.sdk.schemas.Schema)54 BeamIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel)38 ByteString (com.google.protobuf.ByteString)24 BeamCalcRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamCalcRel)22 Matchers.containsString (org.hamcrest.Matchers.containsString)13 Value (com.google.zetasql.Value)9 DateTime (org.joda.time.DateTime)8 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)7 BeamPushDownIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel)6 Calc (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Calc)6 PipelineResult (org.apache.beam.sdk.PipelineResult)5 TestTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider)5 DateTimeUtils.parseDateToValue (org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.parseDateToValue)5 DateTimeUtils.parseTimeToValue (org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.parseTimeToValue)5 DateTimeUtils.parseTimestampWithTZToValue (org.apache.beam.sdk.extensions.sql.zetasql.DateTimeUtils.parseTimestampWithTZToValue)5 Ignore (org.junit.Ignore)5 BeamAggregationRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamAggregationRel)4