use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertWithinRequestByteSizeLimitsErrorsOut.
/**
* Tests that {@link DatasetServiceImpl#insertAll} does not go over limit of rows per request.
*/
@Test
public void testInsertWithinRequestByteSizeLimitsErrorsOut() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow().set("row", Strings.repeat("abcdefghi", 1024 * 1025))), wrapValue(new TableRow().set("row", "a")), wrapValue(new TableRow().set("row", "b")));
List<String> insertIds = ImmutableList.of("a", "b", "c");
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
setupMockResponses(response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(allRowsSucceeded));
}, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(allRowsSucceeded));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.fromArgs("--maxStreamingBatchSize=15").create());
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
List<ValueInSingleWindow<TableRow>> successfulRows = Lists.newArrayList();
RuntimeException e = assertThrows(RuntimeException.class, () -> dataService.<TableRow>insertAll(ref, rows, insertIds, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), failedInserts, ErrorContainer.TABLE_ROW_ERROR_CONTAINER, false, false, false, successfulRows));
assertThat(e.getMessage(), containsString("this row is too large."));
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testFailInsertOtherRetry.
/**
* Tests that {@link DatasetServiceImpl#insertAll} will not retry other non-rate-limited,
* non-quota-exceeded attempts.
*/
@Test
public void testFailInsertOtherRetry() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = new ArrayList<>();
rows.add(wrapValue(new TableRow()));
// First response is 403 non-{rate-limited, quota-exceeded}, second response has valid payload
// but should not be invoked.
setupMockResponses(response -> {
when(response.getStatusCode()).thenReturn(403);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403)));
}, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(new TableDataInsertAllResponse()));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
thrown.expect(RuntimeException.class);
thrown.expectMessage("actually forbidden");
try {
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null);
} finally {
verify(responses[0], atLeastOnce()).getStatusCode();
verify(responses[0]).getContent();
verify(responses[0]).getContentType();
// It should not invoke 2nd response
verify(responses[1], never()).getStatusCode();
verify(responses[1], never()).getContent();
verify(responses[1], never()).getContentType();
}
verifyWriteMetricWasSet("project", "dataset", "table", "actually forbidden", 1);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryUtilTest method testInsertAll.
@Test
public void testInsertAll() throws Exception {
// Build up a list of indices to fail on each invocation. This should result in
// 5 calls to insertAll.
List<List<Long>> errorsIndices = new ArrayList<>();
errorsIndices.add(Arrays.asList(0L, 5L, 10L, 15L, 20L));
errorsIndices.add(Arrays.asList(0L, 2L, 4L));
errorsIndices.add(Arrays.asList(0L, 2L));
errorsIndices.add(new ArrayList<>());
onInsertAll(errorsIndices);
TableReference ref = BigQueryHelpers.parseTableSpec("project:dataset.table");
DatasetServiceImpl datasetService = new DatasetServiceImpl(mockClient, null, options, 5);
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = new ArrayList<>();
List<String> ids = new ArrayList<>();
for (int i = 0; i < 25; ++i) {
rows.add(FailsafeValueInSingleWindow.of(rawRow("foo", 1234), GlobalWindow.TIMESTAMP_MAX_VALUE, GlobalWindow.INSTANCE, PaneInfo.ON_TIME_AND_ONLY_FIRING, rawRow("foo", 1234)));
ids.add("");
}
long totalBytes = datasetService.insertAll(ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null);
verifyInsertAll(5);
// Each of the 25 rows has 1 byte for length and 30 bytes: '{"f":[{"v":"foo"},{"v":1234}]}'
assertEquals("Incorrect byte count", 25L * 31L, totalBytes);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetryPolicy.
/**
* Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
* and returns the list of rows not retried.
*/
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow()), wrapValue(new TableRow()));
// First time row0 fails with a retryable error, and row1 fails with a persistent error.
final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
// Second time there is only one row, which fails with a retryable error.
final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
// On the final attempt, no failures are returned.
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
setupMockResponses(response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(firstFailure));
}, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(secondFialure));
}, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(allRowsSucceeded));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts, ErrorContainer.TABLE_ROW_ERROR_CONTAINER, false, false, false, null);
assertEquals(1, failedInserts.size());
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
verifyWriteMetricWasSet("project", "dataset", "table", "timeout", 2);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertStoppedRetry.
/**
* Tests that {@link DatasetServiceImpl#insertAll} can stop quotaExceeded retry attempts.
*/
@Test
public void testInsertStoppedRetry() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = new ArrayList<>();
rows.add(wrapValue(new TableRow()));
MockSetupFunction quotaExceededResponse = response -> {
when(response.getStatusCode()).thenReturn(403);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("quotaExceeded", 403)));
};
// Respond 403 four times, then valid payload.
setupMockResponses(quotaExceededResponse, quotaExceededResponse, quotaExceededResponse, quotaExceededResponse, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(new TableDataInsertAllResponse()));
});
thrown.expect(RuntimeException.class);
// Google-http-client 1.39.1 and higher does not read the content of the response with error
// status code. How can we ensure appropriate exception is thrown?
thrown.expectMessage("quotaExceeded");
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null);
verifyAllResponsesAreRead();
verifyWriteMetricWasSet("project", "dataset", "table", "quotaexceeded", 1);
}
Aggregations