use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class StreamingWriteFn method finishBundle.
/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
}
tableRows.clear();
uniqueIdsForTableRows.clear();
for (ValueInSingleWindow<TableRow> row : failedInserts) {
context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
}
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class GatherAllPanesTest method multiplePanesMultipleReifiedPane.
@Test
@Category(NeedsRunner.class)
public void multiplePanesMultipleReifiedPane() {
PCollection<Long> someElems = p.apply("someLongs", GenerateSequence.from(0).to(20000));
PCollection<Long> otherElems = p.apply("otherLongs", GenerateSequence.from(0).to(20000));
PCollection<Iterable<ValueInSingleWindow<Iterable<Long>>>> accumulatedPanes = PCollectionList.of(someElems).and(otherElems).apply(Flatten.<Long>pCollections()).apply(WithTimestamps.of(new SerializableFunction<Long, Instant>() {
@Override
public Instant apply(Long input) {
return new Instant(input * 10);
}
})).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(1))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(1))).withAllowedLateness(Duration.ZERO).discardingFiredPanes()).apply(WithKeys.<Void, Long>of((Void) null).withKeyType(new TypeDescriptor<Void>() {
})).apply(GroupByKey.<Void, Long>create()).apply(Values.<Iterable<Long>>create()).apply(GatherAllPanes.<Iterable<Long>>globally());
PAssert.that(accumulatedPanes).satisfies(new SerializableFunction<Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>>, Void>() {
@Override
public Void apply(Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>> input) {
for (Iterable<ValueInSingleWindow<Iterable<Long>>> windowedInput : input) {
if (Iterables.size(windowedInput) > 1) {
return null;
}
}
fail("Expected at least one window to have multiple panes");
return null;
}
});
p.run();
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryUtilTest method testInsertAll.
@Test
public void testInsertAll() throws Exception, IOException {
// Build up a list of indices to fail on each invocation. This should result in
// 5 calls to insertAll.
List<List<Long>> errorsIndices = new ArrayList<>();
errorsIndices.add(Arrays.asList(0L, 5L, 10L, 15L, 20L));
errorsIndices.add(Arrays.asList(0L, 2L, 4L));
errorsIndices.add(Arrays.asList(0L, 2L));
errorsIndices.add(new ArrayList<Long>());
onInsertAll(errorsIndices);
TableReference ref = BigQueryHelpers.parseTableSpec("project:dataset.table");
DatasetServiceImpl datasetService = new DatasetServiceImpl(mockClient, options, 5);
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
List<String> ids = new ArrayList<>();
for (int i = 0; i < 25; ++i) {
rows.add(ValueInSingleWindow.of(rawRow("foo", 1234), GlobalWindow.TIMESTAMP_MAX_VALUE, GlobalWindow.INSTANCE, PaneInfo.ON_TIME_AND_ONLY_FIRING));
ids.add(new String());
}
long totalBytes = 0;
try {
totalBytes = datasetService.insertAll(ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null);
} finally {
verifyInsertAll(5);
// Each of the 25 rows is 23 bytes: "{f=[{v=foo}, {v=1234}]}"
assertEquals("Incorrect byte count", 25L * 23L, totalBytes);
}
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetryPolicy.
/**
* Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
* and returns the list of rows not retried.
*/
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
// First time row0 fails with a retryable error, and row1 fails with a persistent error.
final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
// Second time there is only one row, which fails with a retryable error.
final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
// On the final attempt, no failures are returned.
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200).thenReturn(200);
// First fail
when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
assertEquals(1, failedInserts.size());
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertDoesNotRetry.
/**
* Tests that {@link DatasetServiceImpl#insertAll} does not retry non-rate-limited attempts.
*/
@Test
public void testInsertDoesNotRetry() throws Throwable {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
rows.add(wrapTableRow(new TableRow()));
// First response is 403 not-rate-limited, second response has valid payload but should not
// be invoked.
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(403).thenReturn(200);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
thrown.expect(GoogleJsonResponseException.class);
thrown.expectMessage("actually forbidden");
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
try {
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
fail();
} catch (RuntimeException e) {
verify(response, times(1)).getStatusCode();
verify(response, times(1)).getContent();
verify(response, times(1)).getContentType();
throw e.getCause();
}
}
Aggregations