Search in sources :

Example 11 with AND

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.

the class PubsubTableProviderIT method testSQLSelectsArrayAttributes.

@Ignore("https://issues.apache.org/jira/browse/BEAM-12320")
@Test
public void testSQLSelectsArrayAttributes() throws Exception {
    String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes ARRAY<ROW<key VARCHAR, `value` VARCHAR>>, \n" + "payload ROW< \n" + "             id INTEGER, \n" + "             name VARCHAR \n" + "           > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "%s" + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.SimpleMessage.class.getName());
    String queryString = "SELECT message.payload.id, attributes[1].key AS a1, attributes[2].key AS a2 FROM message";
    // Initialize SQL environment and create the pubsub table
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
    sqlEnv.executeDdl(createTableString);
    // Apply the PTransform to query the pubsub topic
    PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
    // Observe the query results and send success signal after seeing the expected messages
    queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> {
        Map<Integer, String> entries = new HashMap<>();
        for (Row row : observedRows) {
            if ("ts".equals(row.getString("a1"))) {
                entries.put(row.getInt32("id"), row.getString("a2"));
            } else {
                entries.put(row.getInt32("id"), row.getString("a1"));
            }
        }
        LOG.info("Entries: {}", entries);
        return entries.equals(ImmutableMap.of(3, "foo", 5, "bar", 7, "baz"));
    }));
    // Start the pipeline
    pipeline.run();
    // Block until a subscription for this topic exists
    eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
    // Start publishing the messages when main pipeline is started and signaling topic is ready
    eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz")));
    // Poll the signaling topic for success message
    resultSignal.waitForSuccess(timeout);
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Future(java.util.concurrent.Future) TestPubsub(org.apache.beam.sdk.io.gcp.pubsub.TestPubsub) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) SchemaIOTableProviderWrapper(org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) ImmutableSet(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableSet) Serializable(java.io.Serializable) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JdbcDriver(org.apache.beam.sdk.extensions.sql.impl.JdbcDriver) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) JsonMatcher.jsonBytesLike(org.apache.beam.sdk.testing.JsonMatcher.jsonBytesLike) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BeamSqlRelUtils(org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) AvroUtils(org.apache.beam.sdk.schemas.utils.AvroUtils) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Parameter(org.junit.runners.Parameterized.Parameter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) PayloadMessages(org.apache.beam.sdk.extensions.protobuf.PayloadMessages) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Statement(java.sql.Statement) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Map(java.util.Map) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 12 with AND

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.

the class PubsubTableProviderIT method testUsesDlq.

@Test
@SuppressWarnings("unchecked")
public void testUsesDlq() throws Exception {
    String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + "             id INTEGER, \n" + "             name VARCHAR \n" + "           > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + "    '{ " + "       %s" + "       \"timestampAttributeKey\" : \"ts\", " + "       \"deadLetterQueue\" : \"%s\", " + "       \"protoClass\" : \"%s\" " + "     }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), dlqTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
    String queryString = "SELECT message.payload.id, message.payload.name from message";
    // Initialize SQL environment and create the pubsub table
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
    sqlEnv.executeDdl(createTableString);
    // Apply the PTransform to query the pubsub topic
    PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
    // Observe the query results and send success signal after seeing the expected messages
    queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> observedRows.equals(ImmutableSet.of(row(PAYLOAD_SCHEMA, 3, "foo"), row(PAYLOAD_SCHEMA, 5, "bar"), row(PAYLOAD_SCHEMA, 7, "baz")))));
    // Start the pipeline
    pipeline.run();
    // Block until a subscription for this topic exists
    eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
    // Start publishing the messages when main pipeline is started and signaling topics are ready
    eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"), // invalid message, will go to DLQ
    messagePayload(ts(4), "{ - }", ImmutableMap.of()), // invalid message, will go to DLQ
    messagePayload(ts(5), "{ + }", ImmutableMap.of())));
    // Poll the signaling topic for success message
    resultSignal.waitForSuccess(timeout);
    dlqTopic.assertThatTopicEventuallyReceives(matcherPayload(ts(4), "{ - }"), matcherPayload(ts(5), "{ + }")).waitForUpTo(Duration.standardSeconds(40));
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Future(java.util.concurrent.Future) TestPubsub(org.apache.beam.sdk.io.gcp.pubsub.TestPubsub) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) SchemaIOTableProviderWrapper(org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) ImmutableSet(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableSet) Serializable(java.io.Serializable) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JdbcDriver(org.apache.beam.sdk.extensions.sql.impl.JdbcDriver) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) JsonMatcher.jsonBytesLike(org.apache.beam.sdk.testing.JsonMatcher.jsonBytesLike) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BeamSqlRelUtils(org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) AvroUtils(org.apache.beam.sdk.schemas.utils.AvroUtils) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Parameter(org.junit.runners.Parameterized.Parameter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) PayloadMessages(org.apache.beam.sdk.extensions.protobuf.PayloadMessages) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Statement(java.sql.Statement) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 13 with AND

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.

the class BeamZetaSqlCatalog method addTableToLeafCatalog.

/**
 * Assume last element in tablePath is a table name, and everything before is catalogs. So the
 * logic is to create nested catalogs until the last level, then add a table at the last level.
 *
 * <p>Table schema is extracted from Calcite schema based on the table name resolution strategy,
 * e.g. either by drilling down the schema.getSubschema() path or joining the table name with dots
 * to construct a single compound identifier (e.g. Data Catalog use case).
 */
private void addTableToLeafCatalog(List<String> tablePath, QueryTrait queryTrait) {
    SimpleCatalog leafCatalog = createNestedCatalogs(zetaSqlCatalog, tablePath);
    org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table calciteTable = TableResolution.resolveCalciteTable(calciteSchema, tablePath);
    if (calciteTable == null) {
        throw new ZetaSqlException("Wasn't able to resolve the path " + tablePath + " in schema: " + calciteSchema.getName());
    }
    RelDataType rowType = calciteTable.getRowType(typeFactory);
    TableResolution.SimpleTableWithPath tableWithPath = TableResolution.SimpleTableWithPath.of(tablePath);
    queryTrait.addResolvedTable(tableWithPath);
    addFieldsToTable(tableWithPath, rowType);
    leafCatalog.addSimpleTable(tableWithPath.getTable());
}
Also used : SimpleCatalog(com.google.zetasql.SimpleCatalog) RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType)

Example 14 with AND

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.

the class JdbcDriverTest method testDriverManager_simple.

@Test
public void testDriverManager_simple() throws Exception {
    Connection connection = DriverManager.getConnection(JdbcDriver.CONNECT_STRING_PREFIX);
    Statement statement = connection.createStatement();
    // SELECT 1 is a special case and does not reach the parser
    assertTrue(statement.execute("SELECT 1"));
}
Also used : Statement(java.sql.Statement) Connection(java.sql.Connection) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) Test(org.junit.Test)

Example 15 with AND

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.

the class BeamIOPushDownRule method canDropCalc.

/**
 * Perform a series of checks to determine whether a Calc can be dropped. Following conditions
 * need to be met in order for that to happen (logical AND):<br>
 * 1. Program should do simple projects, project each field once, and project fields in the same
 * order when field reordering is not supported.<br>
 * 2. Predicate can be completely pushed-down.<br>
 * 3. Project push-down is supported by the IO or all fields are projected by a Calc.
 *
 * @param program A {@code RexProgram} of a {@code Calc}.
 * @param projectSupport An enum containing information about IO project push-down capabilities.
 * @param tableFilter A class containing information about IO predicate push-down capabilities.
 * @return True when Calc can be dropped, false otherwise.
 */
private boolean canDropCalc(RexProgram program, ProjectSupport projectSupport, BeamSqlTableFilter tableFilter) {
    RelDataType calcInputRowType = program.getInputRowType();
    // Program should do simple projects, project each field once, and project fields in the same
    // order when field reordering is not supported.
    boolean fieldReorderingSupported = projectSupport.withFieldReordering();
    if (!isProjectRenameOnlyProgram(program, fieldReorderingSupported)) {
        return false;
    }
    // Predicate can be completely pushed-down
    if (!tableFilter.getNotSupported().isEmpty()) {
        return false;
    }
    // Project push-down is supported by the IO or all fields are projected by a Calc.
    boolean isProjectSupported = projectSupport.isSupported();
    boolean allFieldsProjected = program.getProjectList().stream().map(ref -> program.getInputRowType().getFieldList().get(ref.getIndex()).getName()).collect(Collectors.toList()).equals(calcInputRowType.getFieldNames());
    return isProjectSupported || allFieldsProjected;
}
Also used : RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType)

Aggregations

RexNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode)16 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)10 RelNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode)9 List (java.util.List)8 Collectors (java.util.stream.Collectors)8 Serializable (java.io.Serializable)7 Arrays (java.util.Arrays)7 RelDataType (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType)7 IOException (java.io.IOException)6 Statement (java.sql.Statement)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 Schema (org.apache.beam.sdk.schemas.Schema)6 PCollection (org.apache.beam.sdk.values.PCollection)6 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 StandardCharsets (java.nio.charset.StandardCharsets)5 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)5