use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.
the class BeamSqlEnvRunner method runUsingBeamSqlEnv.
/**
* This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
* method. (Doesn't perform well when running query96).
*
* @param args Command line arguments
* @throws Exception
*/
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new TextTableProvider());
TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
// Using ExecutorService and CompletionService to fulfill multi-threading functionality
ExecutorService executor = Executors.newFixedThreadPool(nThreads);
CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
// Directly create all tables and register them into inMemoryMetaStore before creating
// BeamSqlEnv object.
registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
// Make an array of pipelines, each pipeline is responsible for running a corresponding query.
Pipeline[] pipelines = new Pipeline[queryNames.length];
// the txt file and store in a GCP directory.
for (int i = 0; i < queryNames.length; i++) {
// For each query, get a copy of pipelineOptions from command line arguments, cast
// tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
// pipeline execution.
TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
// Set a unique job name using the time stamp so that multiple different pipelines can run
// together.
dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
String queryString = QueryReader.readQuery(queryNames[i]);
try {
// Query execution
PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
// Transform the result from PCollection<Row> into PCollection<String>, and write it to the
// location where results are stored.
PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
} catch (Exception e) {
LOG.error("{} failed to execute", queryNames[i]);
e.printStackTrace();
}
completion.submit(new TpcdsRun(pipelines[i]));
}
executor.shutdown();
printExecutionSummary(completion, queryNames.length);
}
use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.
the class BigQueryIOPushDownIT method readUsingDirectReadMethod.
@Test
public void readUsingDirectReadMethod() {
List<RelOptRule> ruleList = new ArrayList<>();
for (RuleSet x : getRuleSets()) {
x.iterator().forEachRemaining(ruleList::add);
}
// Remove push-down rule
ruleList.remove(BeamIOPushDownRule.INSTANCE);
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))).build();
sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result, "_directread");
}
use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.
the class SqlTransform method expand.
@Override
public PCollection<Row> expand(PInput input) {
TableProvider inputTableProvider = new ReadOnlyTableProvider(PCOLLECTION_NAME, toTableMap(input));
InMemoryMetaStore metaTableProvider = new InMemoryMetaStore();
metaTableProvider.registerProvider(inputTableProvider);
BeamSqlEnvBuilder sqlEnvBuilder = BeamSqlEnv.builder(metaTableProvider);
// TODO: validate duplicate functions.
registerFunctions(sqlEnvBuilder);
// the same names are reused.
if (autoLoading()) {
sqlEnvBuilder.autoLoadUserDefinedFunctions();
ServiceLoader.load(TableProvider.class).forEach(metaTableProvider::registerProvider);
}
tableProviderMap().forEach(sqlEnvBuilder::addSchema);
@Nullable final String defaultTableProvider = defaultTableProvider();
if (defaultTableProvider != null) {
sqlEnvBuilder.setCurrentSchema(defaultTableProvider);
}
sqlEnvBuilder.setQueryPlannerClassName(MoreObjects.firstNonNull(queryPlannerClassName(), input.getPipeline().getOptions().as(BeamSqlPipelineOptions.class).getPlannerName()));
sqlEnvBuilder.setPipelineOptions(input.getPipeline().getOptions());
BeamSqlEnv sqlEnv = sqlEnvBuilder.build();
ddlStrings().forEach(sqlEnv::executeDdl);
return BeamSqlRelUtils.toPCollection(input.getPipeline(), sqlEnv.parseQuery(queryString(), queryParameters()), errorsTransformer());
}
use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.
the class PubsubTableProviderIT method connect.
@SuppressWarnings("unchecked")
private CalciteConnection connect(PipelineOptions options, TableProvider... tableProviders) {
// HACK: PipelineOptions should expose a prominent method to do this reliably
// The actual options are in the "options" field of the converted map
Map<String, String> argsMap = ((Map<String, Object>) MAPPER.convertValue(pipeline.getOptions(), Map.class).get("options")).entrySet().stream().filter((entry) -> {
if (entry.getValue() instanceof List) {
if (!((List) entry.getValue()).isEmpty()) {
throw new IllegalArgumentException("Cannot encode list arguments");
}
// We can encode empty lists, just omit them.
return false;
}
return true;
}).collect(Collectors.toMap(Map.Entry::getKey, entry -> toArg(entry.getValue())));
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
for (TableProvider tableProvider : tableProviders) {
inMemoryMetaStore.registerProvider(tableProvider);
}
JdbcConnection connection = JdbcDriver.connect(inMemoryMetaStore, options);
connection.setPipelineOptionsMap(argsMap);
return connection;
}
use of org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore in project beam by apache.
the class BigtableTableWithRowsTest method testCreatesSchemaCorrectly.
@Test
public void testCreatesSchemaCorrectly() {
InMemoryMetaStore metaStore = new InMemoryMetaStore();
metaStore.registerProvider(new BigtableTableProvider());
BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
cli.execute(createFullTableString(TABLE, location()));
Table table = metaStore.getTables().get("beamTable");
assertNotNull(table);
assertEquals(TEST_SCHEMA, table.getSchema());
}
Aggregations