use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetrySelectRows.
/**
* Tests that {@link DatasetServiceImpl#insertAll} retries selected rows on failure.
*/
@Test
public void testInsertRetrySelectRows() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow().set("row", "a")), wrapTableRow(new TableRow().set("row", "b")));
List<String> insertIds = ImmutableList.of("a", "b");
final TableDataInsertAllResponse bFailed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto()))));
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200).thenReturn(200);
when(response.getContent()).thenReturn(toStream(bFailed)).thenReturn(toStream(allRowsSucceeded));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, insertIds, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
verify(response, times(2)).getContentType();
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetry.
/**
* Tests that {@link DatasetServiceImpl#insertAll} retries quota rate limited attempts.
*/
@Test
public void testInsertRetry() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
rows.add(wrapTableRow(new TableRow()));
// First response is 403 rate limited, second response has valid payload.
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(403).thenReturn(200);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
verify(response, times(2)).getContentType();
expectedLogs.verifyInfo("BigQuery insertAll exceeded rate limit, retrying");
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl in project beam by apache.
the class BigQueryServicesImplTest method testInsertFailsGracefully.
/**
* Tests that {@link DatasetServiceImpl#insertAll} fails gracefully when persistent issues.
*/
@Test
public void testInsertFailsGracefully() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
final TableDataInsertAllResponse row1Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L)));
final TableDataInsertAllResponse row0Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L)));
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
// Return row 1 failing, then we retry row 1 as row 0, and row 0 persistently fails.
when(response.getContent()).thenReturn(toStream(row1Failed)).thenAnswer(new Answer<InputStream>() {
@Override
public InputStream answer(InvocationOnMock invocation) throws Throwable {
return toStream(row0Failed);
}
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
// Expect it to fail.
try {
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
fail();
} catch (IOException e) {
assertThat(e, instanceOf(IOException.class));
assertThat(e.getMessage(), containsString("Insert failed:"));
assertThat(e.getMessage(), containsString("[{\"index\":0}]"));
}
// Verify the exact number of retries as well as log messages.
verify(response, times(4)).getStatusCode();
verify(response, times(4)).getContent();
verify(response, times(4)).getContentType();
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl in project beam by apache.
the class BigQueryUtilTest method testInsertAll.
@Test
public void testInsertAll() throws Exception, IOException {
// Build up a list of indices to fail on each invocation. This should result in
// 5 calls to insertAll.
List<List<Long>> errorsIndices = new ArrayList<>();
errorsIndices.add(Arrays.asList(0L, 5L, 10L, 15L, 20L));
errorsIndices.add(Arrays.asList(0L, 2L, 4L));
errorsIndices.add(Arrays.asList(0L, 2L));
errorsIndices.add(new ArrayList<Long>());
onInsertAll(errorsIndices);
TableReference ref = BigQueryHelpers.parseTableSpec("project:dataset.table");
DatasetServiceImpl datasetService = new DatasetServiceImpl(mockClient, options, 5);
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
List<String> ids = new ArrayList<>();
for (int i = 0; i < 25; ++i) {
rows.add(ValueInSingleWindow.of(rawRow("foo", 1234), GlobalWindow.TIMESTAMP_MAX_VALUE, GlobalWindow.INSTANCE, PaneInfo.ON_TIME_AND_ONLY_FIRING));
ids.add(new String());
}
long totalBytes = 0;
try {
totalBytes = datasetService.insertAll(ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null);
} finally {
verifyInsertAll(5);
// Each of the 25 rows is 23 bytes: "{f=[{v=foo}, {v=1234}]}"
assertEquals("Incorrect byte count", 25L * 23L, totalBytes);
}
}
use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetryPolicy.
/**
* Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
* and returns the list of rows not retried.
*/
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
// First time row0 fails with a retryable error, and row1 fails with a persistent error.
final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
// Second time there is only one row, which fails with a retryable error.
final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
// On the final attempt, no failures are returned.
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200).thenReturn(200);
// First fail
when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
assertEquals(1, failedInserts.size());
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Aggregations