use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class BigQueryIOTest method testWriteWithDynamicTables.
public void testWriteWithDynamicTables(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
List<Integer> inserts = new ArrayList<>();
for (int i = 0; i < 10; i++) {
inserts.add(i);
}
// Create a windowing strategy that puts the input into five different windows depending on
// record value.
WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {
@Override
public String apply(Integer i) {
return Integer.toString(i % 5);
}
});
final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
Map<String, String> schemas = Maps.newHashMap();
for (int i = 0; i < 5; i++) {
TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
targetTables.put(i, destination);
// Make sure each target table has its own custom table.
schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
}
SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {
@Override
public TableDestination apply(ValueInSingleWindow<Integer> input) {
PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
// Check that we can access the element as well here and that it matches the window.
checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
return targetTables.get(input.getValue() % 5);
}
};
Pipeline p = TestPipeline.create(bqOptions);
PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
if (streaming) {
input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {
@Override
public TableRow apply(Integer i) {
return new TableRow().set("name", "number" + i).set("number", i);
}
}).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
p.run();
for (int i = 0; i < 5; ++i) {
String tableId = String.format("table-id-%d", i);
String tableSpec = String.format("project-id:dataset-id.%s", tableId);
// Verify that table was created with the correct schema.
assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
// Verify that the table has the expected contents.
assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
}
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class BigQueryIOTest method testTransformingSource.
@Test
public void testTransformingSource() throws Exception {
int numElements = 10000;
@SuppressWarnings("deprecation") BoundedSource<Long> longSource = CountingSource.upTo(numElements);
SerializableFunction<Long, String> toStringFn = new SerializableFunction<Long, String>() {
@Override
public String apply(Long input) {
return input.toString();
}
};
BoundedSource<String> stringSource = new TransformingSource<>(longSource, toStringFn, StringUtf8Coder.of());
List<String> expected = Lists.newArrayList();
for (int i = 0; i < numElements; i++) {
expected.add(String.valueOf(i));
}
PipelineOptions options = PipelineOptionsFactory.create();
Assert.assertThat(SourceTestUtils.readFromSource(stringSource, options), CoreMatchers.is(expected));
SourceTestUtils.assertSplitAtFractionBehavior(stringSource, 100, 0.3, ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT, options);
SourceTestUtils.assertSourcesEqualReferenceSource(stringSource, stringSource.split(100, options), options);
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project java-docs-samples by GoogleCloudPlatform.
the class SpannerReadAll method main.
public static void main(String[] args) {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline p = Pipeline.create(options);
SpannerConfig spannerConfig = SpannerConfig.create().withInstanceId(options.getInstanceId()).withDatabaseId(options.getDatabaseId());
// [START spanner_dataflow_readall]
PCollection<Struct> allRecords = p.apply(SpannerIO.read().withSpannerConfig(spannerConfig).withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply(MapElements.into(TypeDescriptor.of(ReadOperation.class)).via((SerializableFunction<Struct, ReadOperation>) input -> {
String tableName = input.getString(0);
return ReadOperation.create().withQuery("SELECT * FROM " + tableName);
})).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig));
// [END spanner_dataflow_readall]
PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()).apply(Sum.longsGlobally());
dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()).withoutSharding());
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class JdbcIOTest method testWriteWithoutPreparedStatementWithReadRows.
@Test
public void testWriteWithoutPreparedStatementWithReadRows() throws Exception {
SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
String writeTableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS_WITH_READ_ROWS");
DatabaseTestHelper.createTable(DATA_SOURCE, writeTableName);
try {
rows.apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(writeTableName));
pipeline.run();
} finally {
DatabaseTestHelper.deleteTable(DATA_SOURCE, writeTableName);
}
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class JdbcIOTest method testReadWithSchema.
@Test
public void testReadWithSchema() {
SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
JdbcIO.RowMapper<RowWithSchema> rowMapper = rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID"));
pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class);
PCollection<RowWithSchema> rows = pipeline.apply(JdbcIO.<RowWithSchema>read().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withRowMapper(rowMapper).withCoder(SerializableCoder.of(RowWithSchema.class)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING), Schema.Field.of("id", Schema.FieldType.INT32));
assertEquals(expectedSchema, rows.getSchema());
PCollection<Row> output = rows.apply(Select.fieldNames("name", "id"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
pipeline.run();
}
Aggregations