Search in sources :

Example 31 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BeamComplexTypeTest method testRowWithArray.

@Test
public void testRowWithArray() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT rowWithArrayTestTable.col.field3[2] FROM rowWithArrayTestTable"));
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addInt64Field("int64").build()).addValue(6L).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 32 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BeamComplexTypeTest method testBasicRow.

@Test
public void testBasicRow() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT col FROM basicRowTestTable"));
    Schema outputSchema = Schema.builder().addRowField("col", innerRowSchema).build();
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(outputSchema).addValues(Row.withSchema(innerRowSchema).addValues("innerStr", 1L).build()).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : Schema(org.apache.beam.sdk.schemas.Schema) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 33 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BeamComplexTypeTest method testSelectInnerRowOfNestedRow.

@Ignore("https://issues.apache.org/jira/browse/BEAM-5189")
@Test
public void testSelectInnerRowOfNestedRow() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT nestedRowTestTable.col.RowField FROM nestedRowTestTable"));
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addStringField("field1").addInt64Field("field2").build()).addValues("inner_str_one", 1L).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 34 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BeamComplexTypeTest method testArrayConstructor.

@Test
public void testArrayConstructor() {
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT ARRAY[1, 2, 3] f_arr"));
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addArrayField("f_arr", FieldType.INT32).build()).addValue(Arrays.asList(1, 2, 3)).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
Also used : BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 35 with BeamSqlEnv

use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.

the class BeamSqlEnvRunner method runUsingBeamSqlEnv.

/**
 * This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
 * method. (Doesn't perform well when running query96).
 *
 * @param args Command line arguments
 * @throws Exception
 */
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    inMemoryMetaStore.registerProvider(new TextTableProvider());
    TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
    String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
    String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
    int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
    // Using ExecutorService and CompletionService to fulfill multi-threading functionality
    ExecutorService executor = Executors.newFixedThreadPool(nThreads);
    CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
    // Directly create all tables and register them into inMemoryMetaStore before creating
    // BeamSqlEnv object.
    registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
    BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
    BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
    // Make an array of pipelines, each pipeline is responsible for running a corresponding query.
    Pipeline[] pipelines = new Pipeline[queryNames.length];
    // the txt file and store in a GCP directory.
    for (int i = 0; i < queryNames.length; i++) {
        // For each query, get a copy of pipelineOptions from command line arguments, cast
        // tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
        // pipeline execution.
        TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
        DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
        // Set a unique job name using the time stamp so that multiple different pipelines can run
        // together.
        dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
        pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
        String queryString = QueryReader.readQuery(queryNames[i]);
        try {
            // Query execution
            PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
            // Transform the result from PCollection<Row> into PCollection<String>, and write it to the
            // location where results are stored.
            PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
            rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
        } catch (Exception e) {
            LOG.error("{} failed to execute", queryNames[i]);
            e.printStackTrace();
        }
        completion.submit(new TpcdsRun(pipelines[i]));
    }
    executor.shutdown();
    printExecutionSummary(completion, queryNames.length);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TextTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Pipeline(org.apache.beam.sdk.Pipeline) BeamSqlPipelineOptions(org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore)

Aggregations

BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)61 Test (org.junit.Test)54 Row (org.apache.beam.sdk.values.Row)36 TestTableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider)16 PipelineResult (org.apache.beam.sdk.PipelineResult)10 State (org.apache.beam.sdk.PipelineResult.State)10 Schema (org.apache.beam.sdk.schemas.Schema)10 TableProvider (org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider)8 Arrays (java.util.Arrays)6 List (java.util.List)6 Map (java.util.Map)6 ExecutorService (java.util.concurrent.ExecutorService)6 Collectors (java.util.stream.Collectors)6 PayloadMessages (org.apache.beam.sdk.extensions.protobuf.PayloadMessages)6 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)5 Serializable (java.io.Serializable)5 StandardCharsets (java.nio.charset.StandardCharsets)5