use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testAssignWindows.
@Test
public void testAssignWindows() throws Exception {
Duration gapDuration = Duration.standardSeconds(1);
CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
SdkComponents sdkComponents = SdkComponents.create();
sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(FixedWindows.of(gapDuration)), sdkComponents).toByteArray()));
ParallelInstruction addWindowsInstruction = new ParallelInstruction().setSystemName("AssignWindows").setName("AssignWindows").setOriginalName("AssignWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), /*sdkComponents=*/
null))));
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), addWindowsInstruction, makeSinkInstruction(StringUtf8Coder.of(), 1));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
int timestamp1 = 0;
int timestamp2 = 1000000;
server.addWorkToOffer(makeInput(timestamp1, timestamp1));
server.addWorkToOffer(makeInput(timestamp2, timestamp2));
StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
worker.start();
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
assertThat(result.get((long) timestamp1), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ZERO), makeExpectedOutput(timestamp1, timestamp1)).build()));
assertThat(result.get((long) timestamp2), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ONE_SECOND), makeExpectedOutput(timestamp2, timestamp2)).build()));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method testNumberOfWorkerHarnessThreadsIsHonored.
@Test(timeout = 10000)
public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
int expectedNumberOfThreads = 5;
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
}
// This will fail to complete if the number of threads is less than the amount of work.
// Forcing this test to timeout.
BlockingFn.counter.acquire(expectedNumberOfThreads);
// too many items were being processed concurrently.
if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual " + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
}
BlockingFn.blocker.countDown();
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method makeSinkInstruction.
private ParallelInstruction makeSinkInstruction(String streamId, Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) {
CloudObject spec = CloudObject.forClass(WindmillSink.class);
addString(spec, "stream_id", streamId);
return new ParallelInstruction().setSystemName(DEFAULT_SINK_SYSTEM_NAME).setOriginalName(DEFAULT_SINK_ORIGINAL_NAME).setWrite(new WriteInstruction().setInput(new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0)).setSink(new Sink().setSpec(spec).setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(coder, windowCoder), /*sdkComponents=*/
null))));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method makeUnboundedSourcePipeline.
private List<ParallelInstruction> makeUnboundedSourcePipeline(// Total number of messages in each split of the unbounded source.
int numMessagesPerShard, DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> doFn) throws Exception {
DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
options.setNumWorkers(1);
CloudObject codec = CloudObjects.asCloudObject(WindowedValue.getFullCoder(ValueWithRecordId.ValueWithRecordIdCoder.of(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/
null);
return Arrays.asList(new ParallelInstruction().setSystemName("Read").setOriginalName("OriginalReadName").setRead(new ReadInstruction().setSource(CustomSources.serializeToCloudSource(new TestCountingSource(numMessagesPerShard), options).setCodec(codec))).setOutputs(Arrays.asList(new InstructionOutput().setName("read_output").setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(codec))), makeDoFnInstruction(doFn, 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()), makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class StreamingDataflowWorkerTest method makeWindowingSourceInstruction.
private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
CloudObject timerCloudObject = CloudObject.forClassName("com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder");
List<CloudObject> component = Collections.singletonList(CloudObjects.asCloudObject(coder, /*sdkComponents=*/
null));
Structs.addList(timerCloudObject, PropertyNames.COMPONENT_ENCODINGS, component);
CloudObject encodedCoder = CloudObject.forClassName("kind:windowed_value");
Structs.addBoolean(encodedCoder, PropertyNames.IS_WRAPPER, true);
Structs.addList(encodedCoder, PropertyNames.COMPONENT_ENCODINGS, ImmutableList.of(timerCloudObject, CloudObjects.asCloudObject(IntervalWindowCoder.of(), /*sdkComponents=*/
null)));
return new ParallelInstruction().setSystemName(DEFAULT_SOURCE_SYSTEM_NAME).setOriginalName(DEFAULT_SOURCE_ORIGINAL_NAME).setRead(new ReadInstruction().setSource(new Source().setSpec(CloudObject.forClass(WindowingWindmillReader.class)).setCodec(encodedCoder))).setOutputs(Arrays.asList(new InstructionOutput().setName(Long.toString(idGenerator.get())).setCodec(encodedCoder).setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)));
}
Aggregations