use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.
the class BigQueryIOIT method testAvroWrite.
private void testAvroWrite() {
BigQueryIO.Write<byte[]> writeIO = BigQueryIO.<byte[]>write().withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE).withAvroFormatFunction(writeRequest -> {
byte[] data = writeRequest.getElement();
GenericRecord record = new GenericData.Record(writeRequest.getSchema());
record.put("data", ByteBuffer.wrap(data));
return record;
});
testWrite(writeIO, AVRO_WRITE_TIME_METRIC_NAME);
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.
the class BigQueryIOIT method testJsonWrite.
private void testJsonWrite() {
BigQueryIO.Write<byte[]> writeIO = BigQueryIO.<byte[]>write().withFormatFunction(input -> {
TableRow tableRow = new TableRow();
tableRow.set("data", input);
return tableRow;
});
testWrite(writeIO, WRITE_TIME_METRIC_NAME);
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.
the class BigQueryHllSketchCompatibilityIT method writeSketchToBigQuery.
private void writeSketchToBigQuery(List<String> testData, String expectedChecksum) {
String tableSpec = String.format("%s.%s", DATASET_ID, SKETCH_TABLE_ID);
String query = String.format("SELECT HLL_COUNT.EXTRACT(%s) FROM %s", SKETCH_FIELD_NAME, tableSpec);
TableSchema tableSchema = new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName(SKETCH_FIELD_NAME).setType(SKETCH_FIELD_TYPE)));
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
Pipeline p = Pipeline.create(options);
// until we have a stub class for BigQuery TableRow
@SuppressWarnings("nullness") SerializableFunction<byte[], TableRow> formatFn = sketch -> new TableRow().set(SKETCH_FIELD_NAME, sketch.length == 0 ? null : sketch);
p.apply(Create.of(testData).withType(TypeDescriptor.of(String.class))).apply(HllCount.Init.forStrings().globally()).apply(BigQueryIO.<byte[]>write().to(tableSpec).withSchema(tableSchema).withFormatFunction(formatFn).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
p.run().waitUntilFinish();
// BigqueryMatcher will send a query to retrieve the estimated count and verifies its
// correctness using checksum.
assertThat(createQueryUsingStandardSql(APP_NAME, PROJECT_ID, query), queryResultHasChecksum(expectedChecksum));
}
Aggregations