Search in sources :

Example 6 with TextTableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.

the class BeamSqlCliTest method testExecute_createTextTable.

@Test
public void testExecute_createTextTable() throws Exception {
    InMemoryMetaStore metaStore = new InMemoryMetaStore();
    metaStore.registerProvider(new TextTableProvider());
    BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
    cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' LOCATION '/home/admin/orders'");
    Table table = metaStore.getTables().get("person");
    assertNotNull(table);
    assertEquals(Stream.of(Field.of("id", INTEGER).withDescription("id").withNullable(true), Field.of("name", VARCHAR).withDescription("name").withNullable(true), Field.of("age", INTEGER).withDescription("age").withNullable(true)).collect(toSchema()), table.getSchema());
}
Also used : Table(org.apache.beam.sdk.extensions.sql.meta.Table) TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Test(org.junit.Test)

Example 7 with TextTableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.

the class BeamSqlCliTest method testExecute_dropTable.

@Test
public void testExecute_dropTable() throws Exception {
    InMemoryMetaStore metaStore = new InMemoryMetaStore();
    metaStore.registerProvider(new TextTableProvider());
    BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
    cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' LOCATION '/home/admin/orders'");
    Table table = metaStore.getTables().get("person");
    assertNotNull(table);
    cli.execute("drop table person");
    table = metaStore.getTables().get("person");
    assertNull(table);
}
Also used : Table(org.apache.beam.sdk.extensions.sql.meta.Table) TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Test(org.junit.Test)

Example 8 with TextTableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.

the class BeamSqlExplainTest method setUp.

@Before
public void setUp() throws SqlParseException, RelConversionException, ValidationException {
    metaStore = new InMemoryMetaStore();
    metaStore.registerProvider(new TextTableProvider());
    cli = new BeamSqlCli().metaStore(metaStore);
    cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' ");
    cli.execute("CREATE EXTERNAL TABLE A (\n" + "c1 int COMMENT 'c1',\n" + "c2 int COMMENT 'c2')\n" + "TYPE 'text'\n" + "COMMENT '' ");
    cli.execute("CREATE EXTERNAL TABLE B (\n" + "c1 int COMMENT 'c1',\n" + "c2 int COMMENT 'c2')\n" + "TYPE 'text'\n" + "COMMENT '' ");
}
Also used : TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Before(org.junit.Before)

Example 9 with TextTableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.

the class BeamSqlEnvRunner method runUsingBeamSqlEnv.

/**
 * This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
 * method. (Doesn't perform well when running query96).
 *
 * @param args Command line arguments
 * @throws Exception
 */
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    inMemoryMetaStore.registerProvider(new TextTableProvider());
    TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
    String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
    String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
    int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
    // Using ExecutorService and CompletionService to fulfill multi-threading functionality
    ExecutorService executor = Executors.newFixedThreadPool(nThreads);
    CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
    // Directly create all tables and register them into inMemoryMetaStore before creating
    // BeamSqlEnv object.
    registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
    BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
    BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
    // Make an array of pipelines, each pipeline is responsible for running a corresponding query.
    Pipeline[] pipelines = new Pipeline[queryNames.length];
    // the txt file and store in a GCP directory.
    for (int i = 0; i < queryNames.length; i++) {
        // For each query, get a copy of pipelineOptions from command line arguments, cast
        // tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
        // pipeline execution.
        TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
        DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
        // Set a unique job name using the time stamp so that multiple different pipelines can run
        // together.
        dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
        pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
        String queryString = QueryReader.readQuery(queryNames[i]);
        try {
            // Query execution
            PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
            // Transform the result from PCollection<Row> into PCollection<String>, and write it to the
            // location where results are stored.
            PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
            rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
        } catch (Exception e) {
            LOG.error("{} failed to execute", queryNames[i]);
            e.printStackTrace();
        }
        completion.submit(new TpcdsRun(pipelines[i]));
    }
    executor.shutdown();
    printExecutionSummary(completion, queryNames.length);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Pipeline(org.apache.beam.sdk.Pipeline) BeamSqlPipelineOptions(org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)

Example 10 with TextTableProvider

use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.

the class InMemoryMetaStoreTest method setUp.

@Before
public void setUp() {
    store = new InMemoryMetaStore();
    store.registerProvider(new TextTableProvider());
}
Also used : TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) Before(org.junit.Before)

Aggregations

TextTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider)10 InMemoryMetaStore (org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)9 Test (org.junit.Test)7 Table (org.apache.beam.sdk.extensions.sql.meta.Table)5 Before (org.junit.Before)2 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 ExecutorService (java.util.concurrent.ExecutorService)1 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)1 Pipeline (org.apache.beam.sdk.Pipeline)1 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)1 BeamSqlPipelineOptions (org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions)1 Row (org.apache.beam.sdk.values.Row)1