use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOReadTest method checkSetsProject.
private void checkSetsProject(String projectId) throws Exception {
fakeDatasetService.createDataset(projectId, "dataset-id", "", "", null);
String tableId = "sometable";
TableReference tableReference = new TableReference().setProjectId(projectId).setDatasetId("dataset-id").setTableId(tableId);
fakeDatasetService.createTable(new Table().setTableReference(tableReference).setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))));
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L), new TableRow().set("name", "d").set("number", 4L), new TableRow().set("name", "e").set("number", 5L), new TableRow().set("name", "f").set("number", 6L));
fakeDatasetService.insertAll(tableReference, expected, null);
TableReference tableRef = new TableReference().setDatasetId("dataset-id").setTableId(tableId);
PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from(tableRef).withTestServices(fakeBqServices)).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
}
}));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L), KV.of("d", 4L), KV.of("e", 5L), KV.of("f", 6L)));
p.run();
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOReadTest method testReadTableWithSchema.
@Test
public void testReadTableWithSchema() throws IOException, InterruptedException {
// setup
Table someTable = new Table();
someTable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
someTable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("schema_dataset").setTableId("schema_table"));
someTable.setNumBytes(1024L * 1024L);
FakeDatasetService fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset("non-executing-project", "schema_dataset", "", "", null);
fakeDatasetService.createTable(someTable);
List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
fakeDatasetService.insertAll(someTable.getTableReference(), records, null);
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
// test
BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRowsWithSchema().from("non-executing-project:schema_dataset.schema_table").withTestServices(fakeBqServices).withoutValidation();
PCollection<TableRow> bqRows = p.apply(read);
Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING).withNullable(true), Schema.Field.of("number", Schema.FieldType.INT64).withNullable(true));
assertEquals(expectedSchema, bqRows.getSchema());
PCollection<Row> output = bqRows.apply(Select.fieldNames("name", "number"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("a", 1L).build(), Row.withSchema(expectedSchema).addValues("b", 2L).build(), Row.withSchema(expectedSchema).addValues("c", 3L).build()));
p.run();
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOReadTest method testReadFromTable.
private void testReadFromTable(boolean useTemplateCompatibility, boolean useReadTableRows) throws IOException, InterruptedException {
Table sometable = new Table();
sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
sometable.setNumBytes(1024L * 1024L);
FakeDatasetService fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "", null);
fakeDatasetService.createTable(sometable);
List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
PTransform<PBegin, PCollection<TableRow>> readTransform;
if (useReadTableRows) {
BigQueryIO.Read read = BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation();
readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read;
} else {
BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRows().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation();
readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read;
}
PCollection<KV<String, Long>> output = p.apply(readTransform).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
}
}));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
p.run();
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryHllSketchCompatibilityIT method writeSketchToBigQuery.
private void writeSketchToBigQuery(List<String> testData, String expectedChecksum) {
String tableSpec = String.format("%s.%s", DATASET_ID, SKETCH_TABLE_ID);
String query = String.format("SELECT HLL_COUNT.EXTRACT(%s) FROM %s", SKETCH_FIELD_NAME, tableSpec);
TableSchema tableSchema = new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName(SKETCH_FIELD_NAME).setType(SKETCH_FIELD_TYPE)));
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
Pipeline p = Pipeline.create(options);
// until we have a stub class for BigQuery TableRow
@SuppressWarnings("nullness") SerializableFunction<byte[], TableRow> formatFn = sketch -> new TableRow().set(SKETCH_FIELD_NAME, sketch.length == 0 ? null : sketch);
p.apply(Create.of(testData).withType(TypeDescriptor.of(String.class))).apply(HllCount.Init.forStrings().globally()).apply(BigQueryIO.<byte[]>write().to(tableSpec).withSchema(tableSchema).withFormatFunction(formatFn).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
p.run().waitUntilFinish();
// BigqueryMatcher will send a query to retrieve the estimated count and verifies its
// correctness using checksum.
assertThat(createQueryUsingStandardSql(APP_NAME, PROJECT_ID, query), queryResultHasChecksum(expectedChecksum));
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryTornadoes method applyBigQueryTornadoes.
public static void applyBigQueryTornadoes(Pipeline p, Options options) {
// Build the table schema for the output table.
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
TableSchema schema = new TableSchema().setFields(fields);
TypedRead<TableRow> bigqueryIO;
if (!options.getInputQuery().isEmpty()) {
bigqueryIO = BigQueryIO.readTableRows().fromQuery(options.getInputQuery()).usingStandardSql().withMethod(options.getReadMethod());
} else {
bigqueryIO = BigQueryIO.readTableRows().from(options.getInput()).withMethod(options.getReadMethod());
// when reading directly from a table.
if (options.getReadMethod() == TypedRead.Method.DIRECT_READ) {
bigqueryIO = bigqueryIO.withSelectedFields(Lists.newArrayList("month", "tornado"));
}
}
PCollection<TableRow> rowsFromBigQuery = p.apply(bigqueryIO);
rowsFromBigQuery.apply(new CountTornadoes()).apply(BigQueryIO.writeTableRows().to(options.getOutput()).withSchema(schema).withCreateDisposition(options.getCreateDisposition()).withWriteDisposition(options.getWriteDisposition()).withMethod(options.getWriteMethod()));
}
Aggregations