Search in sources :

Example 51 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testAssignWindows.

@Test
public void testAssignWindows() throws Exception {
    Duration gapDuration = Duration.standardSeconds(1);
    CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(FixedWindows.of(gapDuration)), sdkComponents).toByteArray()));
    ParallelInstruction addWindowsInstruction = new ParallelInstruction().setSystemName("AssignWindows").setName("AssignWindows").setOriginalName("AssignWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), /*sdkComponents=*/
    null))));
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), addWindowsInstruction, makeSinkInstruction(StringUtf8Coder.of(), 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    int timestamp1 = 0;
    int timestamp2 = 1000000;
    server.addWorkToOffer(makeInput(timestamp1, timestamp1));
    server.addWorkToOffer(makeInput(timestamp2, timestamp2));
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
    worker.start();
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
    assertThat(result.get((long) timestamp1), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ZERO), makeExpectedOutput(timestamp1, timestamp1)).build()));
    assertThat(result.get((long) timestamp2), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ONE_SECOND), makeExpectedOutput(timestamp2, timestamp2)).build()));
}
Also used : InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Duration(org.joda.time.Duration) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) InstructionInput(com.google.api.services.dataflow.model.InstructionInput) Test(org.junit.Test)

Example 52 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testNumberOfWorkerHarnessThreadsIsHonored.

@Test(timeout = 10000)
public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
    int expectedNumberOfThreads = 5;
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    // This will fail to complete if the number of threads is less than the amount of work.
    // Forcing this test to timeout.
    BlockingFn.counter.acquire(expectedNumberOfThreads);
    // too many items were being processed concurrently.
    if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
        fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual " + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
    }
    BlockingFn.blocker.countDown();
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 53 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method makeSinkInstruction.

private ParallelInstruction makeSinkInstruction(String streamId, Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) {
    CloudObject spec = CloudObject.forClass(WindmillSink.class);
    addString(spec, "stream_id", streamId);
    return new ParallelInstruction().setSystemName(DEFAULT_SINK_SYSTEM_NAME).setOriginalName(DEFAULT_SINK_ORIGINAL_NAME).setWrite(new WriteInstruction().setInput(new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0)).setSink(new Sink().setSpec(spec).setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(coder, windowCoder), /*sdkComponents=*/
    null))));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Sink(com.google.api.services.dataflow.model.Sink) WriteInstruction(com.google.api.services.dataflow.model.WriteInstruction) InstructionInput(com.google.api.services.dataflow.model.InstructionInput)

Example 54 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method makeUnboundedSourcePipeline.

private List<ParallelInstruction> makeUnboundedSourcePipeline(// Total number of messages in each split of the unbounded source.
int numMessagesPerShard, DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> doFn) throws Exception {
    DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
    options.setNumWorkers(1);
    CloudObject codec = CloudObjects.asCloudObject(WindowedValue.getFullCoder(ValueWithRecordId.ValueWithRecordIdCoder.of(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/
    null);
    return Arrays.asList(new ParallelInstruction().setSystemName("Read").setOriginalName("OriginalReadName").setRead(new ReadInstruction().setSource(CustomSources.serializeToCloudSource(new TestCountingSource(numMessagesPerShard), options).setCodec(codec))).setOutputs(Arrays.asList(new InstructionOutput().setName("read_output").setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(codec))), makeDoFnInstruction(doFn, 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()), makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction)

Example 55 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method makeWindowingSourceInstruction.

private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
    CloudObject timerCloudObject = CloudObject.forClassName("com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder");
    List<CloudObject> component = Collections.singletonList(CloudObjects.asCloudObject(coder, /*sdkComponents=*/
    null));
    Structs.addList(timerCloudObject, PropertyNames.COMPONENT_ENCODINGS, component);
    CloudObject encodedCoder = CloudObject.forClassName("kind:windowed_value");
    Structs.addBoolean(encodedCoder, PropertyNames.IS_WRAPPER, true);
    Structs.addList(encodedCoder, PropertyNames.COMPONENT_ENCODINGS, ImmutableList.of(timerCloudObject, CloudObjects.asCloudObject(IntervalWindowCoder.of(), /*sdkComponents=*/
    null)));
    return new ParallelInstruction().setSystemName(DEFAULT_SOURCE_SYSTEM_NAME).setOriginalName(DEFAULT_SOURCE_ORIGINAL_NAME).setRead(new ReadInstruction().setSource(new Source().setSpec(CloudObject.forClass(WindowingWindmillReader.class)).setCodec(encodedCoder))).setOutputs(Arrays.asList(new InstructionOutput().setName(Long.toString(idGenerator.get())).setCodec(encodedCoder).setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) Source(com.google.api.services.dataflow.model.Source)

Aggregations

ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)73 Test (org.junit.Test)39 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)27 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)26 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)24 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)22 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)21 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)20 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)18 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)17 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)17 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)16 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)12 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)11 MapTask (com.google.api.services.dataflow.model.MapTask)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)11 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)11 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)11 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)11