use of org.apache.beam.sdk.values.Row in project beam by apache.
the class JdbcIOTest method testWriteWithoutPsWithNonNullableTableField.
@Test
public void testWriteWithoutPsWithNonNullableTableField() throws Exception {
final int rowsToAdd = 10;
Schema.Builder schemaBuilder = Schema.builder();
schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
Schema schema = schemaBuilder.build();
String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE");
StringBuilder stmt = new StringBuilder("CREATE TABLE ");
stmt.append(tableName);
stmt.append(" (");
stmt.append("column_boolean BOOLEAN,");
stmt.append("column_int INTEGER NOT NULL");
stmt.append(" )");
DatabaseTestHelper.createTableWithStatement(DATA_SOURCE, stmt.toString());
try {
ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
pipeline.apply(Create.of(data)).setRowSchema(schema).apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(tableName));
pipeline.run();
} finally {
DatabaseTestHelper.deleteTable(DATA_SOURCE, tableName);
thrown.expect(RuntimeException.class);
}
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class JdbcIOTest method testReadRowsWithDataSourceConfiguration.
@Test
public void testReadRowsWithDataSourceConfiguration() {
PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
Schema expectedSchema = Schema.of(Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)).withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));
assertEquals(expectedSchema, rows.getSchema());
PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
pipeline.run();
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class SqlBoundedSideInputJoin method expand.
@Override
public PCollection<Bid> expand(PCollection<Event> events) {
PCollection<Row> bids = events.apply(Filter.by(NexmarkQueryUtil.IS_BID)).apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID));
checkState(getSideInput() != null, "Configuration error: side input is null");
TupleTag<Row> sideTag = new TupleTag<Row>("side") {
};
TupleTag<Row> bidTag = new TupleTag<Row>("bid") {
};
Schema schema = Schema.of(Schema.Field.of("id", Schema.FieldType.INT64), Schema.Field.of("extra", Schema.FieldType.STRING));
PCollection<Row> sideRows = getSideInput().setSchema(schema, TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()), kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(), row -> KV.of(row.getInt64("id"), row.getString("extra"))).apply("SideToRows", Convert.toRows());
return PCollectionTuple.of(bidTag, bids).and(sideTag, sideRows).apply(SqlTransform.query(String.format(query, configuration.sideInputRowCount)).withQueryPlannerClass(plannerClass)).apply("ResultToBid", Convert.fromRows(Bid.class));
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class SqlTransformRunner method runUsingSqlTransform.
/**
* This is the default method in BeamTpcds.main method. Run job using SqlTranform.query() method.
*
* @param args Command line arguments
* @throws Exception
*/
public static void runUsingSqlTransform(String[] args) throws Exception {
TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
// Using ExecutorService and CompletionService to fulfill multi-threading functionality
ExecutorService executor = Executors.newFixedThreadPool(nThreads);
CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
// Make an array of pipelines, each pipeline is responsible for running a corresponding query.
Pipeline[] pipelines = new Pipeline[queryNames.length];
CSVFormat csvFormat = CSVFormat.MYSQL.withDelimiter('|').withNullString("");
// the txt file and store in a GCP directory.
for (int i = 0; i < queryNames.length; i++) {
// For each query, get a copy of pipelineOptions from command line arguments.
TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
// Set a unique job name using the time stamp so that multiple different pipelines can run
// together.
tpcdsOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
pipelines[i] = Pipeline.create(tpcdsOptionsCopy);
String queryString = QueryReader.readQuery(queryNames[i]);
PCollectionTuple tables = getTables(pipelines[i], csvFormat, queryNames[i]);
try {
tables.apply(SqlTransform.query(queryString)).apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString)).apply(TextIO.write().to(tpcdsOptions.getResultsDirectory() + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
} catch (Exception e) {
LOG.error("{} failed to execute", queryNames[i]);
e.printStackTrace();
}
completion.submit(new TpcdsRun(pipelines[i]));
}
executor.shutdown();
printExecutionSummary(completion, queryNames.length);
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class ProjectionPushdownOptimizerTest method testBranchedProjectionPushdown.
@Test
public void testBranchedProjectionPushdown() {
Pipeline p = Pipeline.create();
SimpleSourceWithPushdown originalSource = new SimpleSourceWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar", "baz"));
PCollection<Row> input = p.apply(originalSource);
input.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("foo")));
input.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("bar")));
SimpleSourceWithPushdown expectedSource = new SimpleSourceWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar"));
ProjectionPushdownOptimizer.optimize(p);
Assert.assertTrue(pipelineHasTransform(p, expectedSource));
Assert.assertFalse(pipelineHasTransform(p, originalSource));
}
Aggregations