use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class GatherAllPanesTest method singlePaneSingleReifiedPane.
@Test
@Category(NeedsRunner.class)
public void singlePaneSingleReifiedPane() {
PCollection<Iterable<ValueInSingleWindow<Iterable<Long>>>> accumulatedPanes = p.apply(GenerateSequence.from(0).to(20000)).apply(WithTimestamps.of(new SerializableFunction<Long, Instant>() {
@Override
public Instant apply(Long input) {
return new Instant(input * 10);
}
})).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(1))).triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO).discardingFiredPanes()).apply(WithKeys.<Void, Long>of((Void) null).withKeyType(new TypeDescriptor<Void>() {
})).apply(GroupByKey.<Void, Long>create()).apply(Values.<Iterable<Long>>create()).apply(GatherAllPanes.<Iterable<Long>>globally());
PAssert.that(accumulatedPanes).satisfies(new SerializableFunction<Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>>, Void>() {
@Override
public Void apply(Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>> input) {
for (Iterable<ValueInSingleWindow<Iterable<Long>>> windowedInput : input) {
if (Iterables.size(windowedInput) > 1) {
fail("Expected all windows to have exactly one pane, got " + windowedInput);
return null;
}
}
return null;
}
});
p.run();
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetrySelectRows.
/**
* Tests that {@link DatasetServiceImpl#insertAll} retries selected rows on failure.
*/
@Test
public void testInsertRetrySelectRows() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow().set("row", "a")), wrapTableRow(new TableRow().set("row", "b")));
List<String> insertIds = ImmutableList.of("a", "b");
final TableDataInsertAllResponse bFailed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto()))));
final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(200).thenReturn(200);
when(response.getContent()).thenReturn(toStream(bFailed)).thenReturn(toStream(allRowsSucceeded));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, insertIds, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
verify(response, times(2)).getContentType();
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertRetry.
/**
* Tests that {@link DatasetServiceImpl#insertAll} retries quota rate limited attempts.
*/
@Test
public void testInsertRetry() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
rows.add(wrapTableRow(new TableRow()));
// First response is 403 rate limited, second response has valid payload.
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(403).thenReturn(200);
when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
verify(response, times(2)).getStatusCode();
verify(response, times(2)).getContent();
verify(response, times(2)).getContentType();
expectedLogs.verifyInfo("BigQuery insertAll exceeded rate limit, retrying");
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryServicesImplTest method testInsertFailsGracefully.
/**
* Tests that {@link DatasetServiceImpl#insertAll} fails gracefully when persistent issues.
*/
@Test
public void testInsertFailsGracefully() throws Exception {
TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
final TableDataInsertAllResponse row1Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(1L)));
final TableDataInsertAllResponse row0Failed = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L)));
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
// Always return 200.
when(response.getStatusCode()).thenReturn(200);
// Return row 1 failing, then we retry row 1 as row 0, and row 0 persistently fails.
when(response.getContent()).thenReturn(toStream(row1Failed)).thenAnswer(new Answer<InputStream>() {
@Override
public InputStream answer(InvocationOnMock invocation) throws Throwable {
return toStream(row0Failed);
}
});
DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
// Expect it to fail.
try {
dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
fail();
} catch (IOException e) {
assertThat(e, instanceOf(IOException.class));
assertThat(e.getMessage(), containsString("Insert failed:"));
assertThat(e.getMessage(), containsString("[{\"index\":0}]"));
}
// Verify the exact number of retries as well as log messages.
verify(response, times(4)).getStatusCode();
verify(response, times(4)).getContent();
verify(response, times(4)).getContentType();
expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.
the class BigQueryIOTest method testWriteWithDynamicTables.
public void testWriteWithDynamicTables(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
List<Integer> inserts = new ArrayList<>();
for (int i = 0; i < 10; i++) {
inserts.add(i);
}
// Create a windowing strategy that puts the input into five different windows depending on
// record value.
WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {
@Override
public String apply(Integer i) {
return Integer.toString(i % 5);
}
});
final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
Map<String, String> schemas = Maps.newHashMap();
for (int i = 0; i < 5; i++) {
TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
targetTables.put(i, destination);
// Make sure each target table has its own custom table.
schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
}
SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {
@Override
public TableDestination apply(ValueInSingleWindow<Integer> input) {
PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
// Check that we can access the element as well here and that it matches the window.
checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
return targetTables.get(input.getValue() % 5);
}
};
Pipeline p = TestPipeline.create(bqOptions);
PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
if (streaming) {
input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {
@Override
public TableRow apply(Integer i) {
return new TableRow().set("name", "number" + i).set("number", i);
}
}).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
p.run();
for (int i = 0; i < 5; ++i) {
String tableId = String.format("table-id-%d", i);
String tableSpec = String.format("project-id:dataset-id.%s", tableId);
// Verify that table was created with the correct schema.
assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
// Verify that the table has the expected contents.
assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
}
}
Aggregations