use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testClusteringTableFunction.
@Test
public void testClusteringTableFunction() throws Exception {
TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");
TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("date").setType("DATE"), new TableFieldSchema().setName("number").setType("INTEGER")));
// withMethod overrides the pipeline option, so we need to explicitly request
// STORAGE_API_WRITES.
BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.FILE_LOADS;
p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to((ValueInSingleWindow<TableRow> vsw) -> {
String tableSpec = "project-id:dataset-id.table-" + vsw.getValue().get("number");
return new TableDestination(tableSpec, null, new TimePartitioning().setType("DAY").setField("date"), new Clustering().setFields(ImmutableList.of("date")));
}).withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withClustering().withoutValidation());
p.run();
Table table = fakeDatasetService.getTable(BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1"));
assertEquals(schema, table.getSchema());
assertEquals(timePartitioning, table.getTimePartitioning());
assertEquals(clustering, table.getClustering());
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testTriggeredFileLoads.
@Test
public void testTriggeredFileLoads() throws Exception {
if (useStorageApi || !useStreaming) {
return;
}
List<TableRow> elements = Lists.newArrayList();
for (int i = 0; i < 30; ++i) {
elements.add(new TableRow().set("number", i));
}
TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
BigQueryIO.Write.Method method = Method.FILE_LOADS;
p.apply(testStream).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMethod(method).withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testSchemaWriteLoads.
@Test
public void testSchemaWriteLoads() throws Exception {
// withMethod overrides the pipeline option, so we need to explicitly request
// STORAGE_API_WRITES.
BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.FILE_LOADS;
p.apply(Create.of(new SchemaPojo("a", 1), new SchemaPojo("b", 2), new SchemaPojo("c", 3), new SchemaPojo("d", 4))).apply(BigQueryIO.<SchemaPojo>write().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(method).useBeamSchema().withTestServices(fakeBqServices).withoutValidation());
p.run();
System.err.println("Wrote: " + fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"));
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4")));
}
Aggregations