use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetrySelectRows.
/**
* Tests that {@link DatasetServiceImpl#insertAll} retries selected rows on failure.
*/
@Test
public void testInsertRetrySelectRows() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow().set("row", "a")), wrapValue(new TableRow().set("row", "b")));
List<String> insertIds = ImmutableList.of("a", "b");
final TableDataInsertAllResponse bFailed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto()))));
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
setupMockResponses(response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(bFailed));
}, response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(allRowsSucceeded));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, insertIds, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null);
verifyAllResponsesAreRead();
verifyWriteMetricWasSet("project", "dataset", "table", "unknown", 1);
verifyWriteMetricWasSet("project", "dataset", "table", "ok", 1);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testSkipInvalidRowsIgnoreUnknownIgnoreInsertIdsValuesStreaming.
/**
* Tests that {@link DatasetServiceImpl#insertAll} respects the skipInvalidRows,
* ignoreUnknownValues and ignoreInsertIds parameters.
*/
@Test
public void testSkipInvalidRowsIgnoreUnknownIgnoreInsertIdsValuesStreaming() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow()), wrapValue(new TableRow()));
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
// Return a 200 response each time
MockSetupFunction allRowsSucceededResponseFunction = response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenReturn(toStream(allRowsSucceeded));
};
setupMockResponses(allRowsSucceededResponseFunction, allRowsSucceededResponseFunction);
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
// First, test with all flags disabled
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.neverRetry(), Lists.newArrayList(), ErrorContainer.TABLE_ROW_ERROR_CONTAINER, false, false, false, null);
TableDataInsertAllRequest parsedRequest = fromString(request.getContentAsString(), TableDataInsertAllRequest.class);
assertFalse(parsedRequest.getSkipInvalidRows());
assertFalse(parsedRequest.getIgnoreUnknownValues());
// Then with all enabled
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.neverRetry(), Lists.newArrayList(), ErrorContainer.TABLE_ROW_ERROR_CONTAINER, true, true, true, null);
parsedRequest = fromString(request.getContentAsString(), TableDataInsertAllRequest.class);
assertTrue(parsedRequest.getSkipInvalidRows());
assertTrue(parsedRequest.getIgnoreUnknownValues());
assertNull(parsedRequest.getRows().get(0).getInsertId());
assertNull(parsedRequest.getRows().get(1).getInsertId());
verifyWriteMetricWasSet("project", "dataset", "table", "ok", 2);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testExtendedErrorRetrieval.
/**
* Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link ErrorContainer}.
*/
@Test
public void testExtendedErrorRetrieval() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow().set("a", 1)), wrapValue(new TableRow().set("b", 2)));
final TableDataInsertAllResponse failures = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
final List<ValueInSingleWindow<BigQueryInsertError>> expected = ImmutableList.of(wrapErrorValue(new BigQueryInsertError(rows.get(0).getValue(), failures.getInsertErrors().get(0), ref)), wrapErrorValue(new BigQueryInsertError(rows.get(1).getValue(), failures.getInsertErrors().get(1), ref)));
setupMockResponses(response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getContent()).thenReturn(toStream(failures));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
List<ValueInSingleWindow<BigQueryInsertError>> failedInserts = Lists.newArrayList();
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.neverRetry(), failedInserts, ErrorContainer.BIG_QUERY_INSERT_ERROR_ERROR_CONTAINER, false, false, false, null);
assertThat(failedInserts, is(expected));
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertTimeoutLog.
/**
* Tests that {@link DatasetServiceImpl#insertAll} logs suggested remedy for insert timeouts.
*/
@Test
public void testInsertTimeoutLog() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = new ArrayList<>();
rows.add(wrapValue(new TableRow()));
setupMockResponses(response -> {
when(response.getStatusCode()).thenReturn(400);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus(" No rows present in the request. ", 400)));
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
RuntimeException e = assertThrows(RuntimeException.class, () -> dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null));
assertThat(e.getCause().getMessage(), containsString("No rows present in the request."));
verifyAllResponsesAreRead();
expectedLogs.verifyError("No rows present in the request error likely caused by");
verifyWriteMetricWasSet("project", "dataset", "table", " no rows present in the request. ", 1);
}
use of org.apache.beam.sdk.values.FailsafeValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertFailsGracefully.
/**
* Tests that {@link DatasetServiceImpl#insertAll} fails gracefully when persistent issues.
*/
@Test
public void testInsertFailsGracefully() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<FailsafeValueInSingleWindow<TableRow, TableRow>> rows = ImmutableList.of(wrapValue(new TableRow()), wrapValue(new TableRow()));
ErrorProto errorProto = new ErrorProto().setReason("schemaMismatch");
final TableDataInsertAllResponse row1Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(errorProto))));
final TableDataInsertAllResponse row0Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(errorProto))));
MockSetupFunction row0FailureResponseFunction = response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
when(response.getContent()).thenAnswer(invocation -> toStream(row0Failed));
};
setupMockResponses(response -> {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
// Return row 1 failing, then we retry row 1 as row 0, and row 0 persistently fails.
when(response.getContent()).thenReturn(toStream(row1Failed));
}, // 3 failures
row0FailureResponseFunction, row0FailureResponseFunction, row0FailureResponseFunction);
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, null, PipelineOptionsFactory.create());
// Expect it to fail.
try {
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), TEST_BACKOFF, new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null, null, false, false, false, null);
fail();
} catch (IOException e) {
assertThat(e, instanceOf(IOException.class));
assertThat(e.getMessage(), containsString("Insert failed:"));
assertThat(e.getMessage(), containsString("[{\"errors\":[{\"reason\":\"schemaMismatch\"}]"));
}
// Verify the exact number of retries as well as log messages.
verifyAllResponsesAreRead();
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
verifyWriteMetricWasSet("project", "dataset", "table", "schemamismatch", 4);
}
Aggregations