use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong in project beam by apache.
the class StreamingDataflowWorkerTest method testUnboundedSources.
@Test
public void testUnboundedSources() throws Exception {
List<Integer> finalizeTracker = Lists.newArrayList();
TestCountingSource.setFinalizeTracker(finalizeTracker);
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), createTestingPipelineOptions(server), false);
worker.start();
// Test new key.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 1" + " cache_token: 1" + " }" + "}", null));
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Iterable<CounterUpdate> counters = worker.buildCounters();
Windmill.WorkItemCommitRequest commit = result.get(1L);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 1 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "output_messages {" + " destination_stream_id: \"out\"" + " bundles {" + " key: \"0000000000000001\"" + " messages {" + " timestamp: 0" + " data: \"0:0\"" + " }" + " messages_ids: \"\"" + " }" + "} " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build()));
assertEquals(18L, splitIntToLong(getCounter(counters, "dataflow_input_size-computation").getInteger()));
// Test same key continuing. The counter is done.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 2" + " cache_token: 1" + " source_state {" + " state: \"\001\"" + " finalize_ids: " + finalizeId + " } " + " }" + "}", null));
result = server.waitForAndGetCommits(1);
counters = worker.buildCounters();
commit = result.get(2L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
assertThat(finalizeTracker, contains(0));
assertEquals(null, getCounter(counters, "dataflow_input_size-computation"));
// Test recovery (on a new key so fresh reader state). Counter is done.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000002\"" + " sharding_key: 2" + " work_token: 3" + " cache_token: 2" + " source_state {" + " state: \"\000\"" + " } " + " }" + "}", null));
result = server.waitForAndGetCommits(1);
counters = worker.buildCounters();
commit = result.get(3L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000002\" " + "sharding_key: 2 " + "work_token: 3 " + "cache_token: 2 " + "source_backlog_bytes: 7 " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
assertEquals(null, getCounter(counters, "dataflow_input_size-computation"));
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong in project beam by apache.
the class StreamingDataflowWorkerTest method testUnboundedSourcesDrain.
@Test
public void testUnboundedSourcesDrain() throws Exception {
List<Integer> finalizeTracker = Lists.newArrayList();
TestCountingSource.setFinalizeTracker(finalizeTracker);
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), createTestingPipelineOptions(server), true);
worker.start();
// Test new key.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 2" + " cache_token: 3" + " }" + "}", null));
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Windmill.WorkItemCommitRequest commit = result.get(2L);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 3 " + "source_backlog_bytes: 7 " + "output_messages {" + " destination_stream_id: \"out\"" + " bundles {" + " key: \"0000000000000001\"" + " messages {" + " timestamp: 0" + " data: \"0:0\"" + " }" + " messages_ids: \"\"" + " }" + "} " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build()));
// Test drain work item.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 3" + " cache_token: 3" + " source_state {" + " only_finalize: true" + " finalize_ids: " + finalizeId + " }" + " }" + "}", null));
result = server.waitForAndGetCommits(1);
commit = result.get(3L);
assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 3 " + "cache_token: 3 " + "source_state_updates {" + " only_finalize: true" + "} ").build()));
assertThat(finalizeTracker, contains(0));
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong in project beam by apache.
the class StreamingDataflowWorkerTest method testExceptionInvalidatesCache.
@Test
public void testExceptionInvalidatesCache() throws Exception {
// We'll need to force the system to limit bundles to one message at a time.
// Sequence is as follows:
// 01. GetWork[0] (token 0)
// 02. Create counter reader
// 03. Counter yields 0
// 04. GetData[0] (state as null)
// 05. Read state as null
// 06. Set state as 42
// 07. THROW on taking counter reader checkpoint
// 08. Create counter reader
// 09. Counter yields 0
// 10. GetData[1] (state as null)
// 11. Read state as null (*** not 42 ***)
// 12. Take counter reader checkpoint as 0
// 13. CommitWork[0] (message 0:0, state 42, checkpoint 0)
// 14. GetWork[1] (token 1, checkpoint as 0)
// 15. Counter yields 1
// 16. Read (cached) state as 42
// 17. Take counter reader checkpoint 1
// 18. CommitWork[1] (message 0:1, checkpoint 1)
// 19. GetWork[2] (token 2, checkpoint as 1)
// 20. Counter yields 2
// 21. THROW on processElement
// 22. Recreate reader from checkpoint 1
// 23. Counter yields 2 (*** not eof ***)
// 24. GetData[2] (state as 42)
// 25. Read state as 42
// 26. Take counter reader checkpoint 2
// 27. CommitWork[2] (message 0:2, checkpoint 2)
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setExpectedExceptionCount(2);
DataflowPipelineOptions options = createTestingPipelineOptions(server);
options.setNumWorkers(1);
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
debugOptions.setUnboundedReaderMaxElements(1);
CloudObject codec = CloudObjects.asCloudObject(WindowedValue.getFullCoder(ValueWithRecordId.ValueWithRecordIdCoder.of(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/
null);
TestCountingSource counter = new TestCountingSource(3).withThrowOnFirstSnapshot(true);
List<ParallelInstruction> instructions = Arrays.asList(new ParallelInstruction().setOriginalName("OriginalReadName").setSystemName("Read").setName(DEFAULT_PARDO_USER_NAME).setRead(new ReadInstruction().setSource(CustomSources.serializeToCloudSource(counter, options).setCodec(codec))).setOutputs(Arrays.asList(new InstructionOutput().setName("read_output").setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(codec))), makeDoFnInstruction(new TestExceptionInvalidatesCacheFn(), 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()), makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
StreamingDataflowWorker worker = makeWorker(instructions, options.as(StreamingDataflowWorkerOptions.class), true);
worker.setRetryLocallyDelayMs(100);
worker.start();
// Three GetData requests
for (int i = 0; i < 3; i++) {
ByteString state;
if (i == 0 || i == 1) {
state = ByteString.EMPTY;
} else {
state = ByteString.copyFrom(new byte[] { 42 });
}
Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
dataResponse.addDataBuilder().setComputationId(DEFAULT_COMPUTATION_ID).addDataBuilder().setKey(ByteString.copyFromUtf8("0000000000000001")).setShardingKey(1).addValuesBuilder().setTag(ByteString.copyFromUtf8("//+uint")).setStateFamily(DEFAULT_PARDO_STATE_FAMILY).getValueBuilder().setTimestamp(0).setData(state);
server.addDataToOffer(dataResponse.build());
}
// Three GetWork requests and commits
for (int i = 0; i < 3; i++) {
StringBuilder sb = new StringBuilder();
sb.append("work {\n");
sb.append(" computation_id: \"computation\"\n");
sb.append(" input_data_watermark: 0\n");
sb.append(" work {\n");
sb.append(" key: \"0000000000000001\"\n");
sb.append(" sharding_key: 1\n");
sb.append(" work_token: ");
sb.append(i);
sb.append(" cache_token: 1");
sb.append("\n");
if (i > 0) {
int previousCheckpoint = i - 1;
sb.append(" source_state {\n");
sb.append(" state: \"");
sb.append((char) previousCheckpoint);
sb.append("\"\n");
// We'll elide the finalize ids since it's not necessary to trigger the finalizer
// for this test.
sb.append(" }\n");
}
sb.append(" }\n");
sb.append("}\n");
server.addWorkToOffer(buildInput(sb.toString(), null));
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Windmill.WorkItemCommitRequest commit = result.get((long) i);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
sb = new StringBuilder();
sb.append("key: \"0000000000000001\"\n");
sb.append("sharding_key: 1\n");
sb.append("work_token: ");
sb.append(i);
sb.append("\n");
sb.append("cache_token: 1\n");
sb.append("output_messages {\n");
sb.append(" destination_stream_id: \"out\"\n");
sb.append(" bundles {\n");
sb.append(" key: \"0000000000000001\"\n");
int messageNum = i;
sb.append(" messages {\n");
sb.append(" timestamp: ");
sb.append(messageNum * 1000);
sb.append("\n");
sb.append(" data: \"0:");
sb.append(messageNum);
sb.append("\"\n");
sb.append(" }\n");
sb.append(" messages_ids: \"\"\n");
sb.append(" }\n");
sb.append("}\n");
if (i == 0) {
sb.append("value_updates {\n");
sb.append(" tag: \"//+uint\"\n");
sb.append(" value {\n");
sb.append(" timestamp: 0\n");
sb.append(" data: \"");
sb.append((char) 42);
sb.append("\"\n");
sb.append(" }\n");
sb.append(" state_family: \"parDoStateFamily\"\n");
sb.append("}\n");
}
int sourceState = i;
sb.append("source_state_updates {\n");
sb.append(" state: \"");
sb.append((char) sourceState);
sb.append("\"\n");
sb.append(" finalize_ids: ");
sb.append(finalizeId);
sb.append("}\n");
sb.append("source_watermark: ");
sb.append((sourceState + 1) * 1000);
sb.append("\n");
sb.append("source_backlog_bytes: 7\n");
assertThat(// for the current test.
setValuesTimestamps(commit.toBuilder().clearOutputTimers()).build(), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), ImmutableList.of(GlobalWindow.INSTANCE)), parseCommitRequest(sb.toString())).build()));
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong in project beam by apache.
the class StreamingDataflowWorkerTest method testUnboundedSourceWorkRetry.
// Regression test to ensure that a reader is not used from the cache
// on work item retry.
@Test
public void testUnboundedSourceWorkRetry() throws Exception {
List<Integer> finalizeTracker = Lists.newArrayList();
TestCountingSource.setFinalizeTracker(finalizeTracker);
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
// Disable state cache so it doesn't detect retry.
options.setWorkerCacheMb(0);
StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), options, false);
worker.start();
// Test new key.
Windmill.GetWorkResponse work = buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 1" + " cache_token: 1" + " }" + "}", null);
server.addWorkToOffer(work);
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Iterable<CounterUpdate> counters = worker.buildCounters();
Windmill.WorkItemCommitRequest commit = result.get(1L);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
Windmill.WorkItemCommitRequest expectedCommit = setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 1 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "output_messages {" + " destination_stream_id: \"out\"" + " bundles {" + " key: \"0000000000000001\"" + " messages {" + " timestamp: 0" + " data: \"0:0\"" + " }" + " messages_ids: \"\"" + " }" + "} " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build();
assertThat(commit, equalTo(expectedCommit));
assertEquals(18L, splitIntToLong(getCounter(counters, "dataflow_input_size-computation").getInteger()));
// Test retry of work item, it should return the same result and not start the reader from the
// position it was left at.
server.clearCommitsReceived();
server.addWorkToOffer(work);
result = server.waitForAndGetCommits(1);
commit = result.get(1L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
Windmill.WorkItemCommitRequest.Builder commitBuilder = expectedCommit.toBuilder();
commitBuilder.getSourceStateUpdatesBuilder().setFinalizeIds(0, commit.getSourceStateUpdates().getFinalizeIds(0));
expectedCommit = commitBuilder.build();
assertThat(commit, equalTo(expectedCommit));
// Continue with processing.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 2" + " cache_token: 1" + " source_state {" + " state: \"\001\"" + " finalize_ids: " + finalizeId + " } " + " }" + "}", null));
result = server.waitForAndGetCommits(1);
commit = result.get(2L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
assertThat(finalizeTracker, contains(0));
}
Aggregations