use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testBigQueryNoTableQuerySourceInitSplit.
@Test
public void testBigQueryNoTableQuerySourceInitSplit() throws Exception {
TableReference dryRunTable = new TableReference();
Job queryJob = new Job();
JobStatistics queryJobStats = new JobStatistics();
JobStatistics2 queryStats = new JobStatistics2();
queryStats.setReferencedTables(ImmutableList.of(dryRunTable));
queryJobStats.setQuery(queryStats);
queryJob.setStatus(new JobStatus()).setStatistics(queryJobStats);
Job extractJob = new Job();
JobStatistics extractJobStats = new JobStatistics();
JobStatistics4 extractStats = new JobStatistics4();
extractStats.setDestinationUriFileCounts(ImmutableList.of(1L));
extractJobStats.setExtract(extractStats);
extractJob.setStatus(new JobStatus()).setStatistics(extractJobStats);
FakeDatasetService datasetService = new FakeDatasetService();
FakeJobService jobService = new FakeJobService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(jobService).withDatasetService(datasetService);
PipelineOptions options = PipelineOptionsFactory.create();
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
bqOptions.setProject("project");
String stepUuid = "testStepUuid";
TableReference tempTableReference = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L), new TableRow().set("name", "d").set("number", 4L), new TableRow().set("name", "e").set("number", 5L), new TableRow().set("name", "f").set("number", 6L));
datasetService.createDataset(tempTableReference.getProjectId(), tempTableReference.getDatasetId(), "", "");
Table table = new Table().setTableReference(tempTableReference).setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
datasetService.createTable(table);
String query = FakeBigQueryServices.encodeQuery(expected);
jobService.expectDryRunQuery("project", query, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(100L).setReferencedTables(ImmutableList.of(table.getTableReference()))));
Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryNoTableQuerySourceInitSplit");
BoundedSource<TableRow> bqSource = BigQueryQuerySource.create(stepUuid, StaticValueProvider.of(query), true, /* flattenResults */
true, /* useLegacySql */
fakeBqServices);
options.setTempLocation(baseDir.toString());
List<TableRow> read = convertBigDecimaslToLong(SourceTestUtils.readFromSource(bqSource, options));
assertThat(read, containsInAnyOrder(Iterables.toArray(expected, TableRow.class)));
SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
assertEquals(2, sources.size());
BoundedSource<TableRow> actual = sources.get(0);
assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testValidateReadSetsDefaultProject.
@Test
public void testValidateReadSetsDefaultProject() throws Exception {
String projectId = "someproject";
String datasetId = "somedataset";
String tableId = "sometable";
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject(projectId);
Path baseDir = Files.createTempDirectory(tempFolder, "testValidateReadSetsDefaultProject");
bqOptions.setTempLocation(baseDir.toString());
FakeDatasetService fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset(projectId, datasetId, "", "");
TableReference tableReference = new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId);
fakeDatasetService.createTable(new Table().setTableReference(tableReference).setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))));
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L), new TableRow().set("name", "d").set("number", 4L), new TableRow().set("name", "e").set("number", 5L), new TableRow().set("name", "f").set("number", 6L));
fakeDatasetService.insertAll(tableReference, expected, null);
Pipeline p = TestPipeline.create(bqOptions);
TableReference tableRef = new TableReference();
tableRef.setDatasetId(datasetId);
tableRef.setTableId(tableId);
PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from(tableRef).withTestServices(fakeBqServices)).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
}
}));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L), KV.of("d", 4L), KV.of("e", 5L), KV.of("f", 6L)));
p.run();
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testBuildSourceWithTableReference.
@Test
public void testBuildSourceWithTableReference() {
TableReference table = new TableReference().setProjectId("foo.com:project").setDatasetId("somedataset").setTableId("sometable");
BigQueryIO.Read read = BigQueryIO.read().from(table);
checkReadTableObject(read, "foo.com:project", "somedataset", "sometable");
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class WriteRename method processElement.
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
Map<TableDestination, Iterable<String>> tempTablesMap = Maps.newHashMap(c.sideInput(tempTablesView));
// Process each destination table.
for (Map.Entry<TableDestination, Iterable<String>> entry : tempTablesMap.entrySet()) {
TableDestination finalTableDestination = entry.getKey();
List<String> tempTablesJson = Lists.newArrayList(entry.getValue());
// Do not copy if no temp tables are provided
if (tempTablesJson.size() == 0) {
return;
}
List<TableReference> tempTables = Lists.newArrayList();
for (String table : tempTablesJson) {
tempTables.add(BigQueryHelpers.fromJsonString(table, TableReference.class));
}
// Make sure each destination table gets a unique job id.
String jobIdPrefix = BigQueryHelpers.createJobId(c.sideInput(jobIdToken), finalTableDestination, -1);
copy(bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)), bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)), jobIdPrefix, finalTableDestination.getTableReference(), tempTables, writeDisposition, createDisposition, finalTableDestination.getTableDescription());
DatasetService tableService = bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
removeTemporaryTables(tableService, tempTables);
}
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testWriteWithDynamicTables.
public void testWriteWithDynamicTables(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
List<Integer> inserts = new ArrayList<>();
for (int i = 0; i < 10; i++) {
inserts.add(i);
}
// Create a windowing strategy that puts the input into five different windows depending on
// record value.
WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {
@Override
public String apply(Integer i) {
return Integer.toString(i % 5);
}
});
final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
Map<String, String> schemas = Maps.newHashMap();
for (int i = 0; i < 5; i++) {
TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
targetTables.put(i, destination);
// Make sure each target table has its own custom table.
schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
}
SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {
@Override
public TableDestination apply(ValueInSingleWindow<Integer> input) {
PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
// Check that we can access the element as well here and that it matches the window.
checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
return targetTables.get(input.getValue() % 5);
}
};
Pipeline p = TestPipeline.create(bqOptions);
PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
if (streaming) {
input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {
@Override
public TableRow apply(Integer i) {
return new TableRow().set("name", "number" + i).set("number", i);
}
}).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
p.run();
for (int i = 0; i < 5; ++i) {
String tableId = String.format("table-id-%d", i);
String tableSpec = String.format("project-id:dataset-id.%s", tableId);
// Verify that table was created with the correct schema.
assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
// Verify that the table has the expected contents.
assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
}
}
Aggregations