use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testWriteRename.
@Test
public void testWriteRename() throws Exception {
p.enableAbandonedNodeEnforcement(false);
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
final int numFinalTables = 3;
final int numTempTablesPerFinalTable = 3;
final int numRecordsPerTempTable = 10;
Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
String jobIdToken = "jobIdToken";
Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
for (int i = 0; i < numFinalTables; ++i) {
String tableName = "project-id:dataset-id.table_" + i;
TableDestination tableDestination = new TableDestination(tableName, "table_" + i + "_desc");
List<String> tables = Lists.newArrayList();
tempTables.put(tableDestination, tables);
List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
if (expectedRows == null) {
expectedRows = Lists.newArrayList();
expectedRowsPerTable.put(tableDestination, expectedRows);
}
for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
TableReference tempTable = new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
datasetService.createTable(new Table().setTableReference(tempTable));
List<TableRow> rows = Lists.newArrayList();
for (int k = 0; k < numRecordsPerTempTable; ++k) {
rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
}
datasetService.insertAll(tempTable, rows, null);
expectedRows.addAll(rows);
tables.add(BigQueryHelpers.toJsonString(tempTable));
}
}
PCollection<KV<TableDestination, String>> tempTablesPCollection = p.apply(Create.of(tempTables).withCoder(KvCoder.of(TableDestinationCoder.of(), IterableCoder.of(StringUtf8Coder.of())))).apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>, KV<TableDestination, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
TableDestination tableDestination = c.element().getKey();
for (String tempTable : c.element().getValue()) {
c.output(KV.of(tableDestination, tempTable));
}
}
}));
PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = PCollectionViews.multimapView(tempTablesPCollection, WindowingStrategy.globalDefault(), KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
WriteRename writeRename = new WriteRename(fakeBqServices, jobIdTokenView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, tempTablesView);
DoFnTester<Void, Void> tester = DoFnTester.of(writeRename);
tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
tester.processElement(null);
for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
TableDestination tableDestination = entry.getKey();
TableReference tableReference = tableDestination.getTableReference();
Table table = checkNotNull(datasetService.getTable(tableReference));
assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
assertThat(datasetService.getAllRows(tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId()), containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
// Temp tables should be deleted.
for (String tempTableJson : entry.getValue()) {
TableReference tempTable = BigQueryHelpers.fromJsonString(tempTableJson, TableReference.class);
assertEquals(null, datasetService.getTable(tempTable));
}
}
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testBuildWriteWithWriteWithTableDescription.
@Test
public void testBuildWriteWithWriteWithTableDescription() {
final String tblDescription = "foo bar table";
BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to("foo.com:project:somedataset.sometable").withTableDescription(tblDescription);
checkWriteObject(write, "foo.com:project", "somedataset", "sometable", null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, tblDescription);
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testStreamingWrite.
@Test
public void testStreamingWrite() throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService);
Pipeline p = TestPipeline.create(bqOptions);
p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)).withCoder(TableRowJsonCoder.of())).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
p.run();
assertThat(datasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)));
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testBigQueryTableSourceThroughJsonAPI.
@Test
public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"), new TableRow().set("name", "e").set("number", "5"), new TableRow().set("name", "f").set("number", "6"));
TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
datasetService.createTable(new Table().setTableReference(table));
datasetService.insertAll(table, expected, null);
Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
String stepUuid = "testStepUuid";
BoundedSource<TableRow> bqSource = BigQueryTableSource.create(stepUuid, StaticValueProvider.of(table), fakeBqServices);
PipelineOptions options = PipelineOptionsFactory.create();
options.setTempLocation(baseDir.toString());
Assert.assertThat(SourceTestUtils.readFromSource(bqSource, options), CoreMatchers.is(expected));
SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetryPolicy.
/**
* Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
* and returns the list of rows not retried.
*/
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
// First time row0 fails with a retryable error, and row1 fails with a persistent error.
final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
// Second time there is only one row, which fails with a retryable error.
final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
// On the final attempt, no failures are returned.
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200).thenReturn(200);
// First fail
when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
assertEquals(1, failedInserts.size());
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Aggregations