Search in sources :

Example 11 with TableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider in project beam by apache.

the class KafkaTableProviderIT method testFakeNested.

@Test
public void testFakeNested() throws InterruptedException {
    Assume.assumeFalse(topic.equals("csv_topic"));
    pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);
    String createTableString = String.format("CREATE EXTERNAL TABLE kafka_table(\n" + "headers ARRAY<ROW<key VARCHAR, `values` ARRAY<VARBINARY>>>," + "payload ROW<" + "f_long BIGINT NOT NULL, \n" + "f_int INTEGER NOT NULL, \n" + "f_string VARCHAR NOT NULL \n" + ">" + ") \n" + "TYPE 'kafka' \n" + "LOCATION '%s'\n" + "TBLPROPERTIES '%s'", buildLocation(), objectsProvider.getKafkaPropertiesString());
    TableProvider tb = new KafkaTableProvider();
    BeamSqlEnv env = BeamSqlEnv.inMemory(tb);
    env.executeDdl(createTableString);
    PCollection<Row> queryOutput = BeamSqlRelUtils.toPCollection(pipeline, env.parseQuery("SELECT kafka_table.payload.f_long, kafka_table.payload.f_int, kafka_table.payload.f_string FROM kafka_table"));
    queryOutput.apply(ParDo.of(new FakeKvPair())).setCoder(KvCoder.of(StringUtf8Coder.of(), RowCoder.of(TEST_TABLE_SCHEMA))).apply("waitForSuccess", ParDo.of(new StreamAssertEqual(ImmutableSet.of(generateRow(0), generateRow(1), generateRow(2)))));
    queryOutput.apply(logRecords(""));
    pipeline.run();
    TimeUnit.SECONDS.sleep(4);
    produceSomeRecords(3);
    for (int i = 0; i < 200; i++) {
        if (FLAG.getOrDefault(pipeline.getOptions().getOptionsId(), false)) {
            return;
        }
        TimeUnit.MILLISECONDS.sleep(90);
    }
    Assert.fail();
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) Row(org.apache.beam.sdk.values.Row) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Test(org.junit.Test)

Example 12 with TableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider in project beam by apache.

the class SqlTransform method expand.

@Override
public PCollection<Row> expand(PInput input) {
    TableProvider inputTableProvider = new ReadOnlyTableProvider(PCOLLECTION_NAME, toTableMap(input));
    InMemoryMetaStore metaTableProvider = new InMemoryMetaStore();
    metaTableProvider.registerProvider(inputTableProvider);
    BeamSqlEnvBuilder sqlEnvBuilder = BeamSqlEnv.builder(metaTableProvider);
    // TODO: validate duplicate functions.
    registerFunctions(sqlEnvBuilder);
    // the same names are reused.
    if (autoLoading()) {
        sqlEnvBuilder.autoLoadUserDefinedFunctions();
        ServiceLoader.load(TableProvider.class).forEach(metaTableProvider::registerProvider);
    }
    tableProviderMap().forEach(sqlEnvBuilder::addSchema);
    @Nullable final String defaultTableProvider = defaultTableProvider();
    if (defaultTableProvider != null) {
        sqlEnvBuilder.setCurrentSchema(defaultTableProvider);
    }
    sqlEnvBuilder.setQueryPlannerClassName(MoreObjects.firstNonNull(queryPlannerClassName(), input.getPipeline().getOptions().as(BeamSqlPipelineOptions.class).getPlannerName()));
    sqlEnvBuilder.setPipelineOptions(input.getPipeline().getOptions());
    BeamSqlEnv sqlEnv = sqlEnvBuilder.build();
    ddlStrings().forEach(sqlEnv::executeDdl);
    return BeamSqlRelUtils.toPCollection(input.getPipeline(), sqlEnv.parseQuery(queryString(), queryParameters()), errorsTransformer());
}
Also used : BeamSqlPipelineOptions(org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions) ReadOnlyTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.ReadOnlyTableProvider) BeamSqlEnvBuilder(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv.BeamSqlEnvBuilder) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) ReadOnlyTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.ReadOnlyTableProvider) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 13 with TableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider in project beam by apache.

the class PubsubTableProviderIT method connect.

@SuppressWarnings("unchecked")
private CalciteConnection connect(PipelineOptions options, TableProvider... tableProviders) {
    // HACK: PipelineOptions should expose a prominent method to do this reliably
    // The actual options are in the "options" field of the converted map
    Map<String, String> argsMap = ((Map<String, Object>) MAPPER.convertValue(pipeline.getOptions(), Map.class).get("options")).entrySet().stream().filter((entry) -> {
        if (entry.getValue() instanceof List) {
            if (!((List) entry.getValue()).isEmpty()) {
                throw new IllegalArgumentException("Cannot encode list arguments");
            }
            // We can encode empty lists, just omit them.
            return false;
        }
        return true;
    }).collect(Collectors.toMap(Map.Entry::getKey, entry -> toArg(entry.getValue())));
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    for (TableProvider tableProvider : tableProviders) {
        inMemoryMetaStore.registerProvider(tableProvider);
    }
    JdbcConnection connection = JdbcDriver.connect(inMemoryMetaStore, options);
    connection.setPipelineOptionsMap(argsMap);
    return connection;
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Future(java.util.concurrent.Future) TestPubsub(org.apache.beam.sdk.io.gcp.pubsub.TestPubsub) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) SchemaIOTableProviderWrapper(org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) ImmutableSet(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableSet) Serializable(java.io.Serializable) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JdbcDriver(org.apache.beam.sdk.extensions.sql.impl.JdbcDriver) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) JsonMatcher.jsonBytesLike(org.apache.beam.sdk.testing.JsonMatcher.jsonBytesLike) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BeamSqlRelUtils(org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) AvroUtils(org.apache.beam.sdk.schemas.utils.AvroUtils) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Parameter(org.junit.runners.Parameterized.Parameter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) PayloadMessages(org.apache.beam.sdk.extensions.protobuf.PayloadMessages) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Statement(java.sql.Statement) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) List(java.util.List) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) Map(java.util.Map) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)

Aggregations

TableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider)13 Row (org.apache.beam.sdk.values.Row)11 Test (org.junit.Test)11 ReadOnlyTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.ReadOnlyTableProvider)9 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)4 DirectOptions (org.apache.beam.runners.direct.DirectOptions)2 InMemoryMetaStore (org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 List (java.util.List)1