Search in sources :

Example 11 with InMemoryMetaStore

use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.

the class BeamSqlEnvRunner method runUsingBeamSqlEnv.

/**
 * This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
 * method. (Doesn't perform well when running query96).
 *
 * @param args Command line arguments
 * @throws Exception
 */
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    inMemoryMetaStore.registerProvider(new TextTableProvider());
    TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
    String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
    String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
    int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
    // Using ExecutorService and CompletionService to fulfill multi-threading functionality
    ExecutorService executor = Executors.newFixedThreadPool(nThreads);
    CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
    // Directly create all tables and register them into inMemoryMetaStore before creating
    // BeamSqlEnv object.
    registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
    BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
    BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
    // Make an array of pipelines, each pipeline is responsible for running a corresponding query.
    Pipeline[] pipelines = new Pipeline[queryNames.length];
    // the txt file and store in a GCP directory.
    for (int i = 0; i < queryNames.length; i++) {
        // For each query, get a copy of pipelineOptions from command line arguments, cast
        // tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
        // pipeline execution.
        TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
        DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
        // Set a unique job name using the time stamp so that multiple different pipelines can run
        // together.
        dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
        pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
        String queryString = QueryReader.readQuery(queryNames[i]);
        try {
            // Query execution
            PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
            // Transform the result from PCollection<Row> into PCollection<String>, and write it to the
            // location where results are stored.
            PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
            rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
        } catch (Exception e) {
            LOG.error("{} failed to execute", queryNames[i]);
            e.printStackTrace();
        }
        completion.submit(new TpcdsRun(pipelines[i]));
    }
    executor.shutdown();
    printExecutionSummary(completion, queryNames.length);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Pipeline(org.apache.beam.sdk.Pipeline) BeamSqlPipelineOptions(org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)

Example 12 with InMemoryMetaStore

use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.

the class BigQueryIOPushDownIT method readUsingDirectReadMethod.

@Test
public void readUsingDirectReadMethod() {
    List<RelOptRule> ruleList = new ArrayList<>();
    for (RuleSet x : getRuleSets()) {
        x.iterator().forEachRemaining(ruleList::add);
    }
    // Remove push-down rule
    ruleList.remove(BeamIOPushDownRule.INSTANCE);
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    inMemoryMetaStore.registerProvider(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
    sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))).build();
    sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
    BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
    BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishMetrics(result, "_directread");
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) RuleSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSet) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Test(org.junit.Test)

Example 13 with InMemoryMetaStore

use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.

the class SqlTransform method expand.

@Override
public PCollection<Row> expand(PInput input) {
    TableProvider inputTableProvider = new ReadOnlyTableProvider(PCOLLECTION_NAME, toTableMap(input));
    InMemoryMetaStore metaTableProvider = new InMemoryMetaStore();
    metaTableProvider.registerProvider(inputTableProvider);
    BeamSqlEnvBuilder sqlEnvBuilder = BeamSqlEnv.builder(metaTableProvider);
    // TODO: validate duplicate functions.
    registerFunctions(sqlEnvBuilder);
    // the same names are reused.
    if (autoLoading()) {
        sqlEnvBuilder.autoLoadUserDefinedFunctions();
        ServiceLoader.load(TableProvider.class).forEach(metaTableProvider::registerProvider);
    }
    tableProviderMap().forEach(sqlEnvBuilder::addSchema);
    @Nullable final String defaultTableProvider = defaultTableProvider();
    if (defaultTableProvider != null) {
        sqlEnvBuilder.setCurrentSchema(defaultTableProvider);
    }
    sqlEnvBuilder.setQueryPlannerClassName(MoreObjects.firstNonNull(queryPlannerClassName(), input.getPipeline().getOptions().as(BeamSqlPipelineOptions.class).getPlannerName()));
    sqlEnvBuilder.setPipelineOptions(input.getPipeline().getOptions());
    BeamSqlEnv sqlEnv = sqlEnvBuilder.build();
    ddlStrings().forEach(sqlEnv::executeDdl);
    return BeamSqlRelUtils.toPCollection(input.getPipeline(), sqlEnv.parseQuery(queryString(), queryParameters()), errorsTransformer());
}
Also used : BeamSqlPipelineOptions(org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions) ReadOnlyTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.ReadOnlyTableProvider) BeamSqlEnvBuilder(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv.BeamSqlEnvBuilder) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) ReadOnlyTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.ReadOnlyTableProvider) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 14 with InMemoryMetaStore

use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.

the class PubsubTableProviderIT method connect.

@SuppressWarnings("unchecked")
private CalciteConnection connect(PipelineOptions options, TableProvider... tableProviders) {
    // HACK: PipelineOptions should expose a prominent method to do this reliably
    // The actual options are in the "options" field of the converted map
    Map<String, String> argsMap = ((Map<String, Object>) MAPPER.convertValue(pipeline.getOptions(), Map.class).get("options")).entrySet().stream().filter((entry) -> {
        if (entry.getValue() instanceof List) {
            if (!((List) entry.getValue()).isEmpty()) {
                throw new IllegalArgumentException("Cannot encode list arguments");
            }
            // We can encode empty lists, just omit them.
            return false;
        }
        return true;
    }).collect(Collectors.toMap(Map.Entry::getKey, entry -> toArg(entry.getValue())));
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    for (TableProvider tableProvider : tableProviders) {
        inMemoryMetaStore.registerProvider(tableProvider);
    }
    JdbcConnection connection = JdbcDriver.connect(inMemoryMetaStore, options);
    connection.setPipelineOptionsMap(argsMap);
    return connection;
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Future(java.util.concurrent.Future) TestPubsub(org.apache.beam.sdk.io.gcp.pubsub.TestPubsub) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) SchemaIOTableProviderWrapper(org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) ImmutableSet(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableSet) Serializable(java.io.Serializable) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JdbcDriver(org.apache.beam.sdk.extensions.sql.impl.JdbcDriver) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) JsonMatcher.jsonBytesLike(org.apache.beam.sdk.testing.JsonMatcher.jsonBytesLike) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BeamSqlRelUtils(org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) AvroUtils(org.apache.beam.sdk.schemas.utils.AvroUtils) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Parameter(org.junit.runners.Parameterized.Parameter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) PayloadMessages(org.apache.beam.sdk.extensions.protobuf.PayloadMessages) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Statement(java.sql.Statement) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) List(java.util.List) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) Map(java.util.Map) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)

Example 15 with InMemoryMetaStore

use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.

the class BigtableTableWithRowsTest method testCreatesSchemaCorrectly.

@Test
public void testCreatesSchemaCorrectly() {
    InMemoryMetaStore metaStore = new InMemoryMetaStore();
    metaStore.registerProvider(new BigtableTableProvider());
    BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
    cli.execute(createFullTableString(TABLE, location()));
    Table table = metaStore.getTables().get("beamTable");
    assertNotNull(table);
    assertEquals(TEST_SCHEMA, table.getSchema());
}
Also used : Table(org.apache.beam.sdk.extensions.sql.meta.Table) BigtableTableTestUtils.createReadTable(org.apache.beam.sdk.extensions.sql.meta.provider.bigtable.BigtableTableTestUtils.createReadTable) BeamSqlCli(org.apache.beam.sdk.extensions.sql.BeamSqlCli) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Test(org.junit.Test)

Aggregations

InMemoryMetaStore (org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)15 Test (org.junit.Test)12 TextTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider)9 Table (org.apache.beam.sdk.extensions.sql.meta.Table)8 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)3 Row (org.apache.beam.sdk.values.Row)3 ExecutorService (java.util.concurrent.ExecutorService)2 BeamSqlPipelineOptions (org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions)2 TableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider)2 JSONObject (com.alibaba.fastjson.JSONObject)1 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 ArrayList (java.util.ArrayList)1