use of com.google.api.services.dataflow.model.SourceSplitResponse in project beam by apache.
the class WorkerCustomSourcesTest method testLargeSerializedSizeResplits.
@Test
public void testLargeSerializedSizeResplits() throws Exception {
final long apiSizeLimitForTest = 5 * 1024;
// Figure out how many splits of CountingSource are needed to exceed the API limits, using an
// extra factor of 2 to ensure that we go over the limits.
BoundedSource<Long> justForSizing = CountingSource.upTo(1000000L);
long size = DataflowApiUtils.computeSerializedSizeBytes(translateIOToCloudSource(justForSizing, options));
long numberToSplitToExceedLimit = 2 * apiSizeLimitForTest / size;
checkState(numberToSplitToExceedLimit < WorkerCustomSources.DEFAULT_NUM_BUNDLES_LIMIT, "This test expects the number of splits to be less than %s " + "to avoid using SplittableOnlyBoundedSource", WorkerCustomSources.DEFAULT_NUM_BUNDLES_LIMIT);
// Generate a CountingSource and split it into the desired number of splits
// (desired size = 8 bytes, 1 long), triggering the re-split with a larger bundle size.
com.google.api.services.dataflow.model.Source source = translateIOToCloudSource(CountingSource.upTo(numberToSplitToExceedLimit), options);
SourceSplitResponse split = performSplit(source, options, 8L, null, /* numBundles limit */
apiSizeLimitForTest);
logged.verifyWarn("too large for the Google Cloud Dataflow API");
logged.verifyWarn(String.format("%d bundles", numberToSplitToExceedLimit));
assertThat((long) split.getBundles().size(), lessThan(numberToSplitToExceedLimit));
}
Aggregations