Search in sources :

Example 46 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class ThreeTablesSchema method testSystemNotReorderingWithoutRules.

@Test
public void testSystemNotReorderingWithoutRules() {
    TestTableProvider tableProvider = new TestTableProvider();
    createThreeTables(tableProvider);
    List<RelOptRule> ruleSet = BeamRuleSets.getRuleSets().stream().flatMap(rules -> StreamSupport.stream(rules.spliterator(), false)).filter(rule -> !(rule instanceof BeamJoinPushThroughJoinRule)).filter(rule -> !(rule instanceof BeamJoinAssociateRule)).filter(rule -> !(rule instanceof JoinCommuteRule)).collect(Collectors.toList());
    BeamSqlEnv env = BeamSqlEnv.builder(tableProvider).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleSet))).build();
    // This is Join(Join(medium, large), small) which should be converted to a join that large table
    // is on the top.
    BeamRelNode parsedQuery = env.parseQuery("select * from medium_table " + " JOIN large_table on large_table.medium_key = medium_table.large_key " + " JOIN small_table on medium_table.small_key = small_table.medium_key ");
    assertTopTableInJoins(parsedQuery, "small_table");
}
Also used : TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) Linq4j(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.linq4j.Linq4j) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) Arrays(java.util.Arrays) AbstractTable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.impl.AbstractTable) Enumerable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.linq4j.Enumerable) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamRuleSets(org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets) Frameworks(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Frameworks) RelFieldCollation(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelFieldCollation) ImmutableBitSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet) Planner(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Planner) Map(java.util.Map) EnumerableConvention(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.adapter.enumerable.EnumerableConvention) JoinCommuteRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.JoinCommuteRule) RelRoot(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelRoot) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) Collectors(java.util.stream.Collectors) Table(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table) TableScan(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.TableScan) Statistic(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Statistic) List(java.util.List) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) RelCollationTraitDef(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollationTraitDef) AbstractSchema(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.impl.AbstractSchema) RuleSets(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSets) RuleSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSet) Programs(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Programs) RelDataTypeFactory(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataTypeFactory) FrameworkConfig(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.FrameworkConfig) RelCollations(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollations) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Statistics(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Statistics) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Join(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join) ScannableTable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.ScannableTable) StreamSupport(java.util.stream.StreamSupport) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) SqlNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlNode) SqlParser(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.parser.SqlParser) EnumerableRules(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.adapter.enumerable.EnumerableRules) Test(org.junit.Test) ConventionTraitDef(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.ConventionTraitDef) RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType) RelTraitSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelTraitSet) DataContext(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.DataContext) SchemaPlus(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.SchemaPlus) Assert(org.junit.Assert) CoreRules(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.CoreRules) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) JoinCommuteRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.JoinCommuteRule) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Test(org.junit.Test)

Example 47 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class ThreeTablesSchema method testSystemReorderingMediumLargeSmall.

@Test
public void testSystemReorderingMediumLargeSmall() {
    TestTableProvider tableProvider = new TestTableProvider();
    createThreeTables(tableProvider);
    BeamSqlEnv env = BeamSqlEnv.withTableProvider(tableProvider);
    // This is Join(Join(medium, large), small) which should be converted to a join that large table
    // is on the top.
    BeamRelNode parsedQuery = env.parseQuery("select * from medium_table " + " JOIN large_table on large_table.medium_key = medium_table.large_key " + " JOIN small_table on medium_table.small_key = small_table.medium_key ");
    assertTopTableInJoins(parsedQuery, "large_table");
}
Also used : TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Test(org.junit.Test)

Example 48 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class ThreeTablesSchema method testSystemNotReorderingSmallMediumLarge.

@Test
public void testSystemNotReorderingSmallMediumLarge() {
    TestTableProvider tableProvider = new TestTableProvider();
    createThreeTables(tableProvider);
    BeamSqlEnv env = BeamSqlEnv.withTableProvider(tableProvider);
    // This is a correct ordered join because large table is on the top. It should not change that.
    BeamRelNode parsedQuery = env.parseQuery("select * from small_table " + " JOIN medium_table on medium_table.small_key = small_table.medium_key " + " JOIN large_table on large_table.medium_key = medium_table.large_key ");
    assertTopTableInJoins(parsedQuery, "large_table");
}
Also used : TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Test(org.junit.Test)

Example 49 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testSQLWrite.

@Test
public void testSQLWrite() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "'";
    sqlEnv.executeDdl(createTableStatement);
    String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
    sqlEnv.parseQuery(insertStatement);
    BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    bigQueryTestingTypes.assertThatAllRows(SOURCE_SCHEMA_TWO).now(containsInAnyOrder(row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456"))));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Test(org.junit.Test)

Example 50 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BigQueryReadWriteIT method testSQLRead_withExport.

@Test
public void testSQLRead_withExport() throws IOException {
    bigQueryTestingTypes.insertRows(SOURCE_SCHEMA_TWO, row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456")));
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
    String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + "   c_bigint BIGINT, \n" + "   c_tinyint TINYINT, \n" + "   c_smallint SMALLINT, \n" + "   c_integer INTEGER, \n" + "   c_float FLOAT, \n" + "   c_double DOUBLE, \n" + "   c_boolean BOOLEAN, \n" + "   c_timestamp TIMESTAMP, \n" + "   c_varchar VARCHAR, \n " + "   c_char CHAR, \n" + "   c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "'" + "TBLPROPERTIES " + "'{ " + METHOD_PROPERTY + ": \"" + Method.EXPORT.toString() + "\" }'";
    sqlEnv.executeDdl(createTableStatement);
    String selectTableStatement = "SELECT * FROM TEST";
    PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));
    PAssert.that(output).containsInAnyOrder(row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456")));
    PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
    assertThat(state, equalTo(State.DONE));
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)61 Test (org.junit.Test)54 Row (org.apache.beam.sdk.values.Row)36 TestTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider)16 PipelineResult (org.apache.beam.sdk.PipelineResult)10 State (org.apache.beam.sdk.PipelineResult.State)10 Schema (org.apache.beam.sdk.schemas.Schema)10 TableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider)8 Arrays (java.util.Arrays)6 List (java.util.List)6 Map (java.util.Map)6 ExecutorService (java.util.concurrent.ExecutorService)6 Collectors (java.util.stream.Collectors)6 PayloadMessages (org.apache.beam.sdk.extensions.protobuf.PayloadMessages)6 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)5 Serializable (java.io.Serializable)5 StandardCharsets (java.nio.charset.StandardCharsets)5