Search in sources :

Example 31 with BeamSqlTable

use of org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable in project beam by apache.

the class KafkaTableProvider method buildBeamSqlTable.

@Override
public BeamSqlTable buildBeamSqlTable(Table table) {
    Schema schema = table.getSchema();
    JSONObject properties = table.getProperties();
    Optional<ParsedLocation> parsedLocation = Optional.empty();
    if (!Strings.isNullOrEmpty(table.getLocation())) {
        parsedLocation = Optional.of(parseLocation(checkArgumentNotNull(table.getLocation())));
    }
    List<String> topics = mergeParam(parsedLocation.map(loc -> loc.topic), properties.getJSONArray("topics"));
    List<String> allBootstrapServers = mergeParam(parsedLocation.map(loc -> loc.brokerLocation), properties.getJSONArray("bootstrap_servers"));
    String bootstrapServers = String.join(",", allBootstrapServers);
    Optional<String> payloadFormat = properties.containsKey("format") ? Optional.of(properties.getString("format")) : Optional.empty();
    if (Schemas.isNestedSchema(schema)) {
        Optional<PayloadSerializer> serializer = payloadFormat.map(format -> PayloadSerializers.getSerializer(format, checkArgumentNotNull(schema.getField(PAYLOAD_FIELD).getType().getRowSchema()), properties.getInnerMap()));
        return new NestedPayloadKafkaTable(schema, bootstrapServers, topics, serializer);
    } else {
        /*
       * CSV is handled separately because multiple rows can be produced from a single message, which
       * adds complexity to payload extraction. It remains here and as the default because it is the
       * historical default, but it will not be extended to support attaching extended attributes to
       * rows.
       */
        if (payloadFormat.orElse("csv").equals("csv")) {
            return new BeamKafkaCSVTable(schema, bootstrapServers, topics);
        }
        PayloadSerializer serializer = PayloadSerializers.getSerializer(payloadFormat.get(), schema, properties.getInnerMap());
        return new PayloadSerializerKafkaTable(schema, bootstrapServers, topics, serializer);
    }
}
Also used : InMemoryMetaTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.InMemoryMetaTableProvider) Splitter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter) Schema(org.apache.beam.sdk.schemas.Schema) PayloadSerializer(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer) PAYLOAD_FIELD(org.apache.beam.sdk.extensions.sql.meta.provider.kafka.Schemas.PAYLOAD_FIELD) Preconditions.checkArgumentNotNull(org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull) Table(org.apache.beam.sdk.extensions.sql.meta.Table) List(java.util.List) PayloadSerializers(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializers) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) Strings(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings) AutoService(com.google.auto.service.AutoService) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) JSONObject(com.alibaba.fastjson.JSONObject) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) Nullable(org.checkerframework.checker.nullness.qual.Nullable) PayloadSerializer(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer) Schema(org.apache.beam.sdk.schemas.Schema) JSONObject(com.alibaba.fastjson.JSONObject)

Aggregations

BeamSqlTable (org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable)31 Test (org.junit.Test)25 Table (org.apache.beam.sdk.extensions.sql.meta.Table)21 Schema (org.apache.beam.sdk.schemas.Schema)5 BeamTableStatistics (org.apache.beam.sdk.extensions.sql.impl.BeamTableStatistics)3 DefaultTableFilter (org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter)2 Row (org.apache.beam.sdk.values.Row)2 TCompactProtocol (org.apache.thrift.protocol.TCompactProtocol)2 JSONObject (com.alibaba.fastjson.JSONObject)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableRow (com.google.api.services.bigquery.model.TableRow)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 AutoService (com.google.auto.service.AutoService)1 HashMap (java.util.HashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 PayloadMessages (org.apache.beam.sdk.extensions.protobuf.PayloadMessages)1 BeamSqlSeekableTable (org.apache.beam.sdk.extensions.sql.BeamSqlSeekableTable)1