use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class BeamComplexTypeTest method testRowWithArray.
@Test
public void testRowWithArray() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT rowWithArrayTestTable.col.field3[2] FROM rowWithArrayTestTable"));
PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addInt64Field("int64").build()).addValue(6L).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class BeamComplexTypeTest method testBasicRow.
@Test
public void testBasicRow() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT col FROM basicRowTestTable"));
Schema outputSchema = Schema.builder().addRowField("col", innerRowSchema).build();
PAssert.that(stream).containsInAnyOrder(Row.withSchema(outputSchema).addValues(Row.withSchema(innerRowSchema).addValues("innerStr", 1L).build()).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class BeamComplexTypeTest method testSelectInnerRowOfNestedRow.
@Ignore("https://issues.apache.org/jira/browse/BEAM-5189")
@Test
public void testSelectInnerRowOfNestedRow() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT nestedRowTestTable.col.RowField FROM nestedRowTestTable"));
PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addStringField("field1").addInt64Field("field2").build()).addValues("inner_str_one", 1L).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class BeamComplexTypeTest method testArrayConstructor.
@Test
public void testArrayConstructor() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery("SELECT ARRAY[1, 2, 3] f_arr"));
PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addArrayField("f_arr", FieldType.INT32).build()).addValue(Arrays.asList(1, 2, 3)).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class BeamSqlEnvRunner method runUsingBeamSqlEnv.
/**
* This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
* method. (Doesn't perform well when running query96).
*
* @param args Command line arguments
* @throws Exception
*/
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new TextTableProvider());
TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
// Using ExecutorService and CompletionService to fulfill multi-threading functionality
ExecutorService executor = Executors.newFixedThreadPool(nThreads);
CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
// Directly create all tables and register them into inMemoryMetaStore before creating
// BeamSqlEnv object.
registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
// Make an array of pipelines, each pipeline is responsible for running a corresponding query.
Pipeline[] pipelines = new Pipeline[queryNames.length];
// the txt file and store in a GCP directory.
for (int i = 0; i < queryNames.length; i++) {
// For each query, get a copy of pipelineOptions from command line arguments, cast
// tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
// pipeline execution.
TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
// Set a unique job name using the time stamp so that multiple different pipelines can run
// together.
dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
String queryString = QueryReader.readQuery(queryNames[i]);
try {
// Query execution
PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
// Transform the result from PCollection<Row> into PCollection<String>, and write it to the
// location where results are stored.
PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
} catch (Exception e) {
LOG.error("{} failed to execute", queryNames[i]);
e.printStackTrace();
}
completion.submit(new TpcdsRun(pipelines[i]));
}
executor.shutdown();
printExecutionSummary(completion, queryNames.length);
}
Aggregations