use of org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord in project beam by apache.
the class BigQueryHllSketchCompatibilityIT method readSketchFromBigQuery.
private void readSketchFromBigQuery(String tableId, Long expectedCount) {
String tableSpec = String.format("%s.%s", DATASET_ID, tableId);
String query = String.format("SELECT HLL_COUNT.INIT(%s) AS %s FROM %s", DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec);
SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray = input -> HllCount.getSketchFromByteBuffer((ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME));
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
Pipeline p = Pipeline.create(options);
PCollection<Long> result = p.apply(BigQueryIO.read(parseQueryResultToByteArray).withFormat(DataFormat.AVRO).fromQuery(query).usingStandardSql().withMethod(Method.DIRECT_READ).withCoder(ByteArrayCoder.of())).apply(// no-op, only for testing MergePartial
HllCount.MergePartial.globally()).apply(HllCount.Extract.globally());
PAssert.thatSingleton(result).isEqualTo(expectedCount);
p.run().waitUntilFinish();
}
Aggregations