use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.
the class BeamSqlCliTest method testExecute_createTextTable.
@Test
public void testExecute_createTextTable() throws Exception {
InMemoryMetaStore metaStore = new InMemoryMetaStore();
metaStore.registerProvider(new TextTableProvider());
BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' LOCATION '/home/admin/orders'");
Table table = metaStore.getTables().get("person");
assertNotNull(table);
assertEquals(Stream.of(Field.of("id", INTEGER).withDescription("id").withNullable(true), Field.of("name", VARCHAR).withDescription("name").withNullable(true), Field.of("age", INTEGER).withDescription("age").withNullable(true)).collect(toSchema()), table.getSchema());
}
use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.
the class BeamSqlCliTest method testExecute_dropTable.
@Test
public void testExecute_dropTable() throws Exception {
InMemoryMetaStore metaStore = new InMemoryMetaStore();
metaStore.registerProvider(new TextTableProvider());
BeamSqlCli cli = new BeamSqlCli().metaStore(metaStore);
cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' LOCATION '/home/admin/orders'");
Table table = metaStore.getTables().get("person");
assertNotNull(table);
cli.execute("drop table person");
table = metaStore.getTables().get("person");
assertNull(table);
}
use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.
the class BeamSqlExplainTest method setUp.
@Before
public void setUp() throws SqlParseException, RelConversionException, ValidationException {
metaStore = new InMemoryMetaStore();
metaStore.registerProvider(new TextTableProvider());
cli = new BeamSqlCli().metaStore(metaStore);
cli.execute("CREATE EXTERNAL TABLE person (\n" + "id int COMMENT 'id', \n" + "name varchar COMMENT 'name', \n" + "age int COMMENT 'age') \n" + "TYPE 'text' \n" + "COMMENT '' ");
cli.execute("CREATE EXTERNAL TABLE A (\n" + "c1 int COMMENT 'c1',\n" + "c2 int COMMENT 'c2')\n" + "TYPE 'text'\n" + "COMMENT '' ");
cli.execute("CREATE EXTERNAL TABLE B (\n" + "c1 int COMMENT 'c1',\n" + "c2 int COMMENT 'c2')\n" + "TYPE 'text'\n" + "COMMENT '' ");
}
use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.
the class BeamSqlEnvRunner method runUsingBeamSqlEnv.
/**
* This is the alternative method in BeamTpcds.main method. Run job using BeamSqlEnv.parseQuery()
* method. (Doesn't perform well when running query96).
*
* @param args Command line arguments
* @throws Exception
*/
public static void runUsingBeamSqlEnv(String[] args) throws Exception {
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new TextTableProvider());
TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
// Using ExecutorService and CompletionService to fulfill multi-threading functionality
ExecutorService executor = Executors.newFixedThreadPool(nThreads);
CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
// Directly create all tables and register them into inMemoryMetaStore before creating
// BeamSqlEnv object.
registerAllTablesByInMemoryMetaStore(inMemoryMetaStore, dataSize);
BeamSqlPipelineOptions beamSqlPipelineOptions = tpcdsOptions.as(BeamSqlPipelineOptions.class);
BeamSqlEnv env = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(beamSqlPipelineOptions).setQueryPlannerClassName(beamSqlPipelineOptions.getPlannerName()).build();
// Make an array of pipelines, each pipeline is responsible for running a corresponding query.
Pipeline[] pipelines = new Pipeline[queryNames.length];
// the txt file and store in a GCP directory.
for (int i = 0; i < queryNames.length; i++) {
// For each query, get a copy of pipelineOptions from command line arguments, cast
// tpcdsOptions as a DataflowPipelineOptions object to read and set required parameters for
// pipeline execution.
TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
DataflowPipelineOptions dataflowPipelineOptionsCopy = tpcdsOptionsCopy.as(DataflowPipelineOptions.class);
// Set a unique job name using the time stamp so that multiple different pipelines can run
// together.
dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy);
String queryString = QueryReader.readQuery(queryNames[i]);
try {
// Query execution
PCollection<Row> rows = BeamSqlRelUtils.toPCollection(pipelines[i], env.parseQuery(queryString));
// Transform the result from PCollection<Row> into PCollection<String>, and write it to the
// location where results are stored.
PCollection<String> rowStrings = rows.apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString));
rowStrings.apply(TextIO.write().to(RESULT_DIRECTORY + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
} catch (Exception e) {
LOG.error("{} failed to execute", queryNames[i]);
e.printStackTrace();
}
completion.submit(new TpcdsRun(pipelines[i]));
}
executor.shutdown();
printExecutionSummary(completion, queryNames.length);
}
use of org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider in project beam by apache.
the class InMemoryMetaStoreTest method setUp.
@Before
public void setUp() {
store = new InMemoryMetaStore();
store.registerProvider(new TextTableProvider());
}
Aggregations