use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testWriteToTableDecorator.
@Test
public void testWriteToTableDecorator() throws Exception {
TableRow row1 = new TableRow().set("name", "a").set("number", "1");
TableRow row2 = new TableRow().set("name", "b").set("number", "2");
// withMethod overrides the pipeline option, so we need to explicitly requiest
// STORAGE_API_WRITES.
BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS;
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id$20171127").withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withoutValidation());
p.run();
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testTimePartitioning.
@Test
public void testTimePartitioning() throws Exception {
BigQueryIO.Write.Method method;
if (useStorageApi) {
method = useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API;
} else if (useStreaming) {
method = Method.STREAMING_INSERTS;
} else {
method = Method.FILE_LOADS;
}
testTimePartitioning(method);
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testTriggeredFileLoadsWithTempTablesAndDataset.
@Test
public void testTriggeredFileLoadsWithTempTablesAndDataset() throws Exception {
String tableRef = "bigquery-project-id:dataset-id.table-id";
List<TableRow> elements = Lists.newArrayList();
for (int i = 0; i < 30; ++i) {
elements.add(new TableRow().set("number", i));
}
TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
BigQueryIO.Write.Method method = Method.FILE_LOADS;
p.apply(testStream).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMaxBytesPerPartition(1).withMaxFilesPerPartition(1).withMethod(method).withoutValidation().withWriteTempDataset("temp-dataset-id"));
p.run();
final int projectIdSplitter = tableRef.indexOf(':');
final String projectId = projectIdSplitter == -1 ? "project-id" : tableRef.substring(0, projectIdSplitter);
assertThat(fakeDatasetService.getAllRows(projectId, "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testTriggeredFileLoadsWithTempTables.
public void testTriggeredFileLoadsWithTempTables(String tableRef) throws Exception {
if (useStorageApi || !useStreaming) {
return;
}
List<TableRow> elements = Lists.newArrayList();
for (int i = 0; i < 30; ++i) {
elements.add(new TableRow().set("number", i));
}
TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
BigQueryIO.Write.Method method = Method.FILE_LOADS;
p.apply(testStream).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMaxBytesPerPartition(1).withMaxFilesPerPartition(1).withMethod(method).withoutValidation());
p.run();
final int projectIdSplitter = tableRef.indexOf(':');
final String projectId = projectIdSplitter == -1 ? "project-id" : tableRef.substring(0, projectIdSplitter);
assertThat(fakeDatasetService.getAllRows(projectId, "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.
the class BigQueryIOWriteTest method testCreateNever.
@Test
public void testCreateNever() throws Exception {
BigQueryIO.Write.Method method = useStreaming ? (useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS) : useStorageApi ? Method.STORAGE_WRITE_API : Method.FILE_LOADS;
p.enableAbandonedNodeEnforcement(false);
TableReference tableRef = BigQueryHelpers.parseTableSpec("project-id:dataset-id.table");
TableSchema tableSchema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
fakeDatasetService.createTable(new Table().setTableReference(tableRef).setSchema(tableSchema));
PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0).to(10)).apply(MapElements.via(new SimpleFunction<Long, TableRow>() {
@Override
public TableRow apply(Long input) {
return new TableRow().set("name", "name " + input).set("number", input);
}
})).setCoder(TableRowJsonCoder.of());
tableRows.apply(BigQueryIO.writeTableRows().to(tableRef).withMethod(method).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER).withTestServices(fakeBqServices).withoutValidation());
p.run();
}
Aggregations