use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOWriteTest method testWriteWithDynamicTables.
@Test
public void testWriteWithDynamicTables() throws Exception {
List<Integer> inserts = new ArrayList<>();
for (int i = 0; i < 10; i++) {
inserts.add(i);
}
// Create a windowing strategy that puts the input into five different windows depending on
// record value.
WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows<>(i -> Integer.toString(i % 5));
final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
Map<String, String> schemas = Maps.newHashMap();
for (int i = 0; i < 5; i++) {
TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
targetTables.put(i, destination);
// Make sure each target table has its own custom table.
schemas.put(destination.getTableSpec(), toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
}
SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = input -> {
PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
// Check that we can access the element as well here and that it matches the window.
checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
return targetTables.get(input.getValue() % 5);
};
PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
if (useStreaming) {
input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.asMap());
input.apply(Window.into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(i -> new TableRow().set("name", "number" + i).set("number", Integer.toString(i))).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
p.run();
for (int i = 0; i < 5; ++i) {
String tableId = String.format("table-id-%d", i);
String tableSpec = String.format("project-id:dataset-id.%s", tableId);
// Verify that table was created with the correct schema.
assertThat(toJsonString(fakeDatasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
// Verify that the table has the expected contents.
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", Integer.toString(i)), new TableRow().set("name", String.format("number%d", i + 5)).set("number", Integer.toString(i + 5))));
}
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOWriteTest method testFailuresNoRetryPolicy.
@Test
public void testFailuresNoRetryPolicy() throws Exception {
if (useStorageApi || !useStreaming) {
return;
}
TableRow row1 = new TableRow().set("name", "a").set("number", "1");
TableRow row2 = new TableRow().set("name", "b").set("number", "2");
TableRow row3 = new TableRow().set("name", "c").set("number", "3");
TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
fakeDatasetService.failOnInsert(ImmutableMap.of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError)));
p.apply(Create.of(row1, row2, row3)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3));
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOWriteTest method testWriteWithSuccessfulBatchInsertsAndWriteRename.
@Test
public void testWriteWithSuccessfulBatchInsertsAndWriteRename() throws Exception {
if (useStreaming || useStorageApi) {
return;
}
WriteResult result = p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to("dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withMaxFileSize(1).withMaxFilesPerPartition(1).withTestServices(fakeBqServices).withoutValidation());
PAssert.that(result.getSuccessfulTableLoads()).containsInAnyOrder(new TableDestination("project-id:dataset-id.table-id", null));
p.run();
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOWriteTest method testWriteEmptyPCollection.
@Test
public void testWriteEmptyPCollection() throws Exception {
if (useStreaming || useStorageApi) {
return;
}
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));
p.apply(Create.empty(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withTestServices(fakeBqServices).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchema(schema).withoutValidation());
p.run();
checkNotNull(fakeDatasetService.getTable(BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id")));
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOWriteTest method testWriteWithoutInsertId.
@Test
public void testWriteWithoutInsertId() throws Exception {
if (useStorageApi || !useStreaming) {
return;
}
TableRow row1 = new TableRow().set("name", "a").set("number", 1);
TableRow row2 = new TableRow().set("name", "b").set("number", 2);
TableRow row3 = new TableRow().set("name", "c").set("number", 3);
p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).ignoreInsertIds().withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row2, row3));
// Verify no insert id is added.
assertThat(fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder());
}
Aggregations