use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class FilterExamples method buildWeatherSchemaProjection.
/**
* Helper method to build the table schema for the output table.
*/
private static TableSchema buildWeatherSchemaProjection() {
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
TableSchema schema = new TableSchema().setFields(fields);
return schema;
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQueryIOTest method testReadFromTable.
@Test
public void testReadFromTable() throws IOException, InterruptedException {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
Job job = new Job();
JobStatus status = new JobStatus();
job.setStatus(status);
JobStatistics jobStats = new JobStatistics();
job.setStatistics(jobStats);
JobStatistics4 extract = new JobStatistics4();
jobStats.setExtract(extract);
extract.setDestinationUriFileCounts(ImmutableList.of(1L));
Table sometable = new Table();
sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
sometable.setNumBytes(1024L * 1024L);
FakeDatasetService fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
fakeDatasetService.createTable(sometable);
List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
Pipeline p = TestPipeline.create(bqOptions);
PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
}
}));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
p.run();
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQueryIOTest method writeDynamicDestinations.
public void writeDynamicDestinations(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("project-id");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
Pipeline p = TestPipeline.create(bqOptions);
final PCollectionView<List<String>> sideInput1 = p.apply("Create SideInput 1", Create.of("a", "b", "c").withCoder(StringUtf8Coder.of())).apply("asList", View.<String>asList());
final PCollectionView<Map<String, String>> sideInput2 = p.apply("Create SideInput2", Create.of(KV.of("a", "a"), KV.of("b", "b"), KV.of("c", "c"))).apply("AsMap", View.<String, String>asMap());
final List<String> allUsernames = ImmutableList.of("bill", "bob", "randolph");
List<String> userList = Lists.newArrayList();
// WriteGroupedRecordsToFiles.
for (int i = 0; i < BatchLoads.DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE * 10; ++i) {
// Every user has 10 nicknames.
for (int j = 0; j < 1; ++j) {
String nickname = allUsernames.get(ThreadLocalRandom.current().nextInt(allUsernames.size()));
userList.add(nickname + i);
}
}
PCollection<String> users = p.apply("CreateUsers", Create.of(userList)).apply(Window.into(new PartitionedGlobalWindows<>(new SerializableFunction<String, String>() {
@Override
public String apply(String arg) {
return arg;
}
})));
if (streaming) {
users = users.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
users.apply("WriteBigQuery", BigQueryIO.<String>write().withTestServices(fakeBqServices).withMaxFilesPerBundle(5).withMaxFileSize(10).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withFormatFunction(new SerializableFunction<String, TableRow>() {
@Override
public TableRow apply(String user) {
Matcher matcher = userPattern.matcher(user);
if (matcher.matches()) {
return new TableRow().set("name", matcher.group(1)).set("id", Integer.valueOf(matcher.group(2)));
}
throw new RuntimeException("Unmatching element " + user);
}
}).to(new StringIntegerDestinations() {
@Override
public Integer getDestination(ValueInSingleWindow<String> element) {
assertThat(element.getWindow(), Matchers.instanceOf(PartitionedGlobalWindow.class));
Matcher matcher = userPattern.matcher(element.getValue());
if (matcher.matches()) {
// a table.
return Integer.valueOf(matcher.group(2));
}
throw new RuntimeException("Unmatching destination " + element.getValue());
}
@Override
public TableDestination getTable(Integer userId) {
verifySideInputs();
// Each user in it's own table.
return new TableDestination("dataset-id.userid-" + userId, "table for userid " + userId);
}
@Override
public TableSchema getSchema(Integer userId) {
verifySideInputs();
return new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("id").setType("INTEGER")));
}
@Override
public List<PCollectionView<?>> getSideInputs() {
return ImmutableList.of(sideInput1, sideInput2);
}
private void verifySideInputs() {
assertThat(sideInput(sideInput1), containsInAnyOrder("a", "b", "c"));
Map<String, String> mapSideInput = sideInput(sideInput2);
assertEquals(3, mapSideInput.size());
assertThat(mapSideInput, allOf(hasEntry("a", "a"), hasEntry("b", "b"), hasEntry("c", "c")));
}
}).withoutValidation());
p.run();
File tempDir = new File(bqOptions.getTempLocation());
testNumFiles(tempDir, 0);
Map<Integer, List<TableRow>> expectedTableRows = Maps.newHashMap();
for (int i = 0; i < userList.size(); ++i) {
Matcher matcher = userPattern.matcher(userList.get(i));
checkState(matcher.matches());
String nickname = matcher.group(1);
int userid = Integer.valueOf(matcher.group(2));
List<TableRow> expected = expectedTableRows.get(userid);
if (expected == null) {
expected = Lists.newArrayList();
expectedTableRows.put(userid, expected);
}
expected.add(new TableRow().set("name", nickname).set("id", userid));
}
for (Map.Entry<Integer, List<TableRow>> entry : expectedTableRows.entrySet()) {
assertThat(datasetService.getAllRows("project-id", "dataset-id", "userid-" + entry.getKey()), containsInAnyOrder(Iterables.toArray(entry.getValue(), TableRow.class)));
}
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQueryIOTest method testBuildWriteDisplayData.
@Test
public void testBuildWriteDisplayData() {
String tableSpec = "project:dataset.table";
TableSchema schema = new TableSchema().set("col1", "type1").set("col2", "type2");
final String tblDescription = "foo bar table";
BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to(tableSpec).withSchema(schema).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(WriteDisposition.WRITE_APPEND).withTableDescription(tblDescription).withoutValidation();
DisplayData displayData = DisplayData.from(write);
assertThat(displayData, hasDisplayItem("table"));
assertThat(displayData, hasDisplayItem("schema"));
assertThat(displayData, hasDisplayItem("createDisposition", CreateDisposition.CREATE_IF_NEEDED.toString()));
assertThat(displayData, hasDisplayItem("writeDisposition", WriteDisposition.WRITE_APPEND.toString()));
assertThat(displayData, hasDisplayItem("tableDescription", tblDescription));
assertThat(displayData, hasDisplayItem("validation", false));
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQueryIOTest method testStreamingWrite.
@Test
public void testStreamingWrite() throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService);
Pipeline p = TestPipeline.create(bqOptions);
p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)).withCoder(TableRowJsonCoder.of())).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
p.run();
assertThat(datasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)));
}
Aggregations