use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class RemoveFlattenInstructionsFunctionTest method testRemoveFlattenOnMultiOutputInstruction.
@Test
public void testRemoveFlattenOnMultiOutputInstruction() {
Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
Node aOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out1"), PCOLLECTION_ID);
Node aOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out2"), PCOLLECTION_ID);
Node aOut3PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out3"), PCOLLECTION_ID);
Edge aOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
Edge aOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
Edge aOut3 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out3"));
Edge aOut1PCollectionEdge = DefaultEdge.create();
Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
Edge bOut1PCollectionEdge = DefaultEdge.create();
Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
Edge cOutput = DefaultEdge.create();
Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
Node d = ParallelInstructionNode.create(new ParallelInstruction().setName("D"), Nodes.ExecutionLocation.UNKNOWN);
Edge dOutput = DefaultEdge.create();
Node dPCollection = InstructionOutputNode.create(new InstructionOutput().setName("D.out"), PCOLLECTION_ID);
Node e = ParallelInstructionNode.create(new ParallelInstruction().setName("E"), Nodes.ExecutionLocation.UNKNOWN);
Edge eOutput = DefaultEdge.create();
Node ePCollection = InstructionOutputNode.create(new InstructionOutput().setName("E.out"), PCOLLECTION_ID);
// /-out1-> C
// A -out2-\
// \-out3--> Flatten --> D
// B -out2-/
// \-out1-> E
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(a);
network.addNode(aOut1PCollection);
network.addNode(aOut2PCollection);
network.addNode(aOut3PCollection);
network.addNode(b);
network.addNode(bOut1PCollection);
network.addNode(bOut2PCollection);
network.addNode(flatten);
network.addNode(flattenPCollection);
network.addNode(c);
network.addNode(cPCollection);
network.addNode(d);
network.addNode(dPCollection);
network.addNode(e);
network.addNode(ePCollection);
network.addEdge(a, aOut1PCollection, aOut1);
network.addEdge(a, aOut2PCollection, aOut2);
network.addEdge(a, aOut3PCollection, aOut3);
network.addEdge(aOut1PCollection, c, aOut1PCollectionEdge);
network.addEdge(aOut2PCollection, flatten, DefaultEdge.create());
network.addEdge(aOut3PCollection, flatten, DefaultEdge.create());
network.addEdge(b, bOut1PCollection, bOut1);
network.addEdge(b, bOut2PCollection, bOut2);
network.addEdge(bOut1PCollection, e, bOut1PCollectionEdge);
network.addEdge(bOut2PCollection, flatten, DefaultEdge.create());
network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
network.addEdge(flattenPCollection, d, DefaultEdge.create());
network.addEdge(c, cPCollection, cOutput);
network.addEdge(d, dPCollection, dOutput);
network.addEdge(e, ePCollection, eOutput);
// /-out1-> C
// A -out2-\
// \-out3--> D
// B -out2-/
// \-out1-> E
assertThatFlattenIsProperlyRemoved(network);
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testIgnoreRetriedKeys.
@Test
public void testIgnoreRetriedKeys() throws Exception {
final int numIters = 4;
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
// Also add work for a different shard of the same key.
server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
}
// Wait for keys to schedule. They will be blocked.
BlockingFn.counter.acquire(numIters * 2);
// Re-add the work, it should be ignored due to the keys being active.
for (int i = 0; i < numIters; ++i) {
// Same work token.
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
}
// Give all added calls a chance to run.
server.waitForEmptyWorkQueue();
for (int i = 0; i < numIters; ++i) {
// Different work token same keys.
server.addWorkToOffer(makeInput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
}
// Give all added calls a chance to run.
server.waitForEmptyWorkQueue();
// Release the blocked calls.
BlockingFn.blocker.countDown();
// Verify the output
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters * 3);
for (int i = 0; i < numIters; ++i) {
assertTrue(result.containsKey((long) i));
assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
assertTrue(result.containsKey((long) i + 1000));
assertEquals(makeExpectedOutput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1, keyStringForIndex(i)).build(), result.get((long) i + 1000));
assertTrue(result.containsKey((long) i + numIters));
assertEquals(makeExpectedOutput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters));
}
// Re-add the work, it should process due to the keys no longer being active.
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
}
result = server.waitForAndGetCommits(numIters);
worker.stop();
for (int i = 0; i < numIters; ++i) {
assertTrue(result.containsKey((long) i + numIters * 2));
assertEquals(makeExpectedOutput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters * 2));
}
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testBasicHarness.
@Test
public void testBasicHarness() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
}
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
worker.stop();
for (int i = 0; i < numIters; ++i) {
assertTrue(result.containsKey((long) i));
assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
}
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testExceptions.
@Test(timeout = 30000)
public void testExceptions() throws Exception {
if (streamingEngine) {
// TODO: This test needs to be adapted to work with streamingEngine=true.
return;
}
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 1));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setExpectedExceptionCount(1);
String keyString = keyStringForIndex(0);
server.addWorkToOffer(buildInput("work {" + " computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" + " input_data_watermark: 0" + " work {" + " key: \"" + keyString + "\"" + " sharding_key: 1" + " work_token: 0" + " cache_token: 1" + " message_bundles {" + " source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" + " messages {" + " timestamp: 0" + " data: \"0\"" + " }" + " }" + " }" + "}", CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW))));
StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), true);
worker.start();
server.waitForEmptyWorkQueue();
// Wait until the worker has given up.
int maxTries = 10;
while (maxTries-- > 0 && !worker.workExecutorIsEmpty()) {
Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
}
assertTrue(worker.workExecutorIsEmpty());
// Spam worker updates a few times.
maxTries = 10;
while (maxTries-- > 0) {
worker.reportPeriodicWorkerUpdates();
Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
}
// We should see our update only one time with the exceptions we are expecting.
ArgumentCaptor<WorkItemStatus> workItemStatusCaptor = ArgumentCaptor.forClass(WorkItemStatus.class);
verify(mockWorkUnitClient, atLeast(1)).reportWorkItemStatus(workItemStatusCaptor.capture());
List<WorkItemStatus> capturedStatuses = workItemStatusCaptor.getAllValues();
boolean foundErrors = false;
int lastUpdateWithoutErrors = 0;
int lastUpdateWithErrors = 0;
for (WorkItemStatus status : capturedStatuses) {
if (status.getErrors().isEmpty()) {
lastUpdateWithoutErrors++;
continue;
}
lastUpdateWithErrors++;
assertFalse(foundErrors);
foundErrors = true;
String stacktrace = status.getErrors().get(0).getMessage();
assertThat(stacktrace, Matchers.containsString("Exception!"));
assertThat(stacktrace, Matchers.containsString("Another exception!"));
assertThat(stacktrace, Matchers.containsString("processElement"));
}
assertTrue(foundErrors);
// The last update we see should not have any errors. This indicates we've retried the workitem.
assertTrue(lastUpdateWithoutErrors > lastUpdateWithErrors);
// Confirm we've received the expected stats. There is no guarantee stats will only be reported
// once.
assertThat(server.getStatsReceived().size(), Matchers.greaterThanOrEqualTo(1));
Windmill.ReportStatsRequest stats = server.getStatsReceived().get(0);
assertEquals(DEFAULT_COMPUTATION_ID, stats.getComputationId());
assertEquals(keyString, stats.getKey().toStringUtf8());
assertEquals(0, stats.getWorkToken());
assertEquals(1, stats.getShardingKey());
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testBasic.
@Test
public void testBasic() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setIsReady(false);
StreamingConfigTask streamingConfig = new StreamingConfigTask();
streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
streamingConfig.setWindmillServiceEndpoint("foo");
WorkItem workItem = new WorkItem();
workItem.setStreamingConfigTask(streamingConfig);
when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
}
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
worker.stop();
for (int i = 0; i < numIters; ++i) {
assertTrue(result.containsKey((long) i));
assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
}
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Aggregations