Search in sources :

Example 56 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testActiveWorkRefresh.

@Test
public void testActiveWorkRefresh() throws Exception {
    if (streamingEngine) {
        // TODO: This test needs to be adapted to work with streamingEngine=true.
        return;
    }
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(new SlowDoFn(), 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    options.setActiveWorkRefreshPeriodMillis(100);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    server.addWorkToOffer(makeInput(0, TimeUnit.MILLISECONDS.toMicros(0)));
    server.waitForAndGetCommits(1);
    worker.stop();
    // This graph will not normally produce any GetData calls, so all such calls are from active
    // work refreshes.
    assertThat(server.numGetDataRequests(), greaterThan(0));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 57 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testMergeWindowsCaching.

@Test
public // the first processing having is_new_key set.
void testMergeWindowsCaching() throws Exception {
    Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
    Coder<WindowedValue<KV<String, String>>> windowedKvCoder = FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
    KvCoder<String, List<String>> groupedCoder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
    Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder = FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());
    CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))).withTimestampCombiner(TimestampCombiner.EARLIEST), sdkComponents).toByteArray()));
    addObject(spec, WorkerPropertyNames.INPUT_CODER, CloudObjects.asCloudObject(windowedKvCoder, /*sdkComponents=*/
    null));
    ParallelInstruction mergeWindowsInstruction = new ParallelInstruction().setSystemName("MergeWindows-System").setName("MergeWindowsStep").setOriginalName("MergeWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(windowedGroupedCoder, /*sdkComponents=*/
    null))));
    List<ParallelInstruction> instructions = Arrays.asList(makeWindowingSourceInstruction(kvCoder), mergeWindowsInstruction, // Use multiple stages in the maptask to test caching with multiple stages.
    makeDoFnInstruction(new PassthroughDoFn(), 1, groupedCoder), makeSinkInstruction(groupedCoder, 2));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
    Map<String, String> nameMap = new HashMap<>();
    nameMap.put("MergeWindowsStep", "MergeWindows");
    worker.addStateNameMappings(nameMap);
    worker.start();
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"" + DEFAULT_KEY_STRING + "\"" + "    sharding_key: " + DEFAULT_SHARDING_KEY + "    cache_token: 1" + "    work_token: 1" + "    is_new_key: 1" + "    message_bundles {" + "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" + "      messages {" + "        timestamp: 0" + "        data: \"" + dataStringForIndex(0) + "\"" + "      }" + "    }" + "  }" + "}", intervalWindowBytes(WINDOW_AT_ZERO)));
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    Iterable<CounterUpdate> counters = worker.buildCounters();
    // These tags and data are opaque strings and this is a change detector test.
    // The "/u" indicates the user's namespace, versus "/s" for system namespace
    String window = "/gAAAAAAAA-joBw/";
    String timerTagPrefix = "/s" + window + "+0";
    ByteString bufferTag = ByteString.copyFromUtf8(window + "+ubuf");
    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+upane");
    String watermarkDataHoldTag = window + "+uhold";
    String watermarkExtraHoldTag = window + "+uextra";
    String stateFamily = "MergeWindows";
    ByteString bufferData = ByteString.copyFromUtf8("data0");
    // Encoded form for Iterable<String>: -1, true, 'data0', false
    ByteString outputData = ByteString.copyFrom(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x01, 0x05, 0x64, 0x61, 0x74, 0x61, 0x30, 0x00 });
    // These values are not essential to the change detector test
    long timerTimestamp = 999000L;
    WorkItemCommitRequest actualOutput = result.get(1L);
    // Set timer
    verifyTimers(actualOutput, buildWatermarkTimer(timerTagPrefix, 999));
    assertThat(actualOutput.getBagUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagBag.newBuilder().setTag(bufferTag).setStateFamily(stateFamily).addValues(bufferData).build())));
    verifyHolds(actualOutput, buildHold(watermarkDataHoldTag, 0, false));
    // No state reads
    assertEquals(0L, splitIntToLong(getCounter(counters, "WindmillStateBytesRead").getInteger()));
    // Timer + buffer + watermark hold
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder(actualOutput).clearCounterUpdates().clearOutputMessages().build().getSerializedSize(), splitIntToLong(getCounter(counters, "WindmillStateBytesWritten").getInteger()));
    // Input messages
    assertEquals(VarInt.getLength(0L) + dataStringForIndex(0).length() + addPaneTag(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ZERO)).size() + // proto overhead
    5L, splitIntToLong(getCounter(counters, "WindmillShuffleBytesRead").getInteger()));
    Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
    getWorkResponse.addWorkBuilder().setComputationId(DEFAULT_COMPUTATION_ID).setInputDataWatermark(timerTimestamp + 1000).addWorkBuilder().setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING)).setShardingKey(DEFAULT_SHARDING_KEY).setWorkToken(2).setCacheToken(1).getTimersBuilder().addTimers(buildWatermarkTimer(timerTagPrefix, timerTimestamp));
    server.addWorkToOffer(getWorkResponse.build());
    long expectedBytesRead = 0L;
    Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
    Windmill.KeyedGetDataResponse.Builder dataBuilder = dataResponse.addDataBuilder().setComputationId(DEFAULT_COMPUTATION_ID).addDataBuilder().setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING)).setShardingKey(DEFAULT_SHARDING_KEY);
    // These reads are skipped due to being cached from accesses in the first work item processing.
    // dataBuilder
    // .addBagsBuilder()
    // .setTag(bufferTag)
    // .setStateFamily(stateFamily)
    // .addValues(bufferData);
    // dataBuilder
    // .addWatermarkHoldsBuilder()
    // .setTag(ByteString.copyFromUtf8(watermarkDataHoldTag))
    // .setStateFamily(stateFamily)
    // .addTimestamps(0);
    dataBuilder.addWatermarkHoldsBuilder().setTag(ByteString.copyFromUtf8(watermarkExtraHoldTag)).setStateFamily(stateFamily).addTimestamps(0);
    dataBuilder.addValuesBuilder().setTag(paneInfoTag).setStateFamily(stateFamily).getValueBuilder().setTimestamp(0).setData(ByteString.EMPTY);
    server.addDataToOffer(dataResponse.build());
    expectedBytesRead += dataBuilder.build().getSerializedSize();
    result = server.waitForAndGetCommits(1);
    counters = worker.buildCounters();
    actualOutput = result.get(2L);
    assertEquals(1, actualOutput.getOutputMessagesCount());
    assertEquals(DEFAULT_DESTINATION_STREAM_ID, actualOutput.getOutputMessages(0).getDestinationStreamId());
    assertEquals(DEFAULT_KEY_STRING, actualOutput.getOutputMessages(0).getBundles(0).getKey().toStringUtf8());
    assertEquals(0, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getTimestamp());
    assertEquals(outputData, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
    ByteString metadata = actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
    InputStream inStream = metadata.newInput();
    assertEquals(PaneInfo.createPane(true, true, Timing.ON_TIME), PaneInfoCoder.INSTANCE.decode(inStream));
    assertEquals(Arrays.asList(WINDOW_AT_ZERO), DEFAULT_WINDOW_COLLECTION_CODER.decode(inStream, Coder.Context.OUTER));
    // Data was deleted
    assertThat("" + actualOutput.getValueUpdatesList(), actualOutput.getValueUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagValue.newBuilder().setTag(paneInfoTag).setStateFamily(stateFamily).setValue(Windmill.Value.newBuilder().setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY)).build())));
    assertThat("" + actualOutput.getBagUpdatesList(), actualOutput.getBagUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagBag.newBuilder().setTag(bufferTag).setStateFamily(stateFamily).setDeleteAll(true).build())));
    verifyHolds(actualOutput, buildHold(watermarkDataHoldTag, -1, true), buildHold(watermarkExtraHoldTag, -1, true));
    // State reads for windowing
    assertEquals(expectedBytesRead, splitIntToLong(getCounter(counters, "WindmillStateBytesRead").getInteger()));
    // State updates to clear state
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder(actualOutput).clearCounterUpdates().clearOutputMessages().build().getSerializedSize(), splitIntToLong(getCounter(counters, "WindmillStateBytesWritten").getInteger()));
    // No input messages
    assertEquals(0L, splitIntToLong(getCounter(counters, "WindmillShuffleBytesRead").getInteger()));
    CacheStats stats = worker.stateCache.getCacheStats();
    LOG.info("cache stats {}", stats);
    assertEquals(1, stats.hitCount());
    assertEquals(4, stats.missCount());
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) InputStream(java.io.InputStream) GetWorkResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkResponse) KV(org.apache.beam.sdk.values.KV) KeyedGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) ComputationGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationGetDataResponse) KeyedGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse) GetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetDataResponse) CacheStats(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.cache.CacheStats) InstructionInput(com.google.api.services.dataflow.model.InstructionInput) Test(org.junit.Test)

Example 58 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method makeDoFnInstruction.

private ParallelInstruction makeDoFnInstruction(DoFn<?, ?> doFn, int producerIndex, Coder<?> outputCoder, WindowingStrategy<?, ?> windowingStrategy) {
    CloudObject spec = CloudObject.forClassName("DoFn");
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(doFn, windowingStrategy, /* windowing strategy */
    null, /* side input views */
    null, /* input coder */
    new TupleTag<>(PropertyNames.OUTPUT), /* main output id */
    DoFnSchemaInformation.create(), Collections.emptyMap()))));
    return new ParallelInstruction().setSystemName(DEFAULT_PARDO_SYSTEM_NAME).setName(DEFAULT_PARDO_USER_NAME).setOriginalName(DEFAULT_PARDO_ORIGINAL_NAME).setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0)).setNumOutputs(1).setUserFn(spec).setMultiOutputInfos(Arrays.asList(new MultiOutputInfo().setTag(PropertyNames.OUTPUT)))).setOutputs(Arrays.asList(new InstructionOutput().setName(PropertyNames.OUTPUT).setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(outputCoder, windowingStrategy.getWindowFn().windowCoder()), /*sdkComponents=*/
    null))));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) TupleTag(org.apache.beam.sdk.values.TupleTag) InstructionInput(com.google.api.services.dataflow.model.InstructionInput)

Example 59 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testMergeWindows.

@Test
public // Runs a merging windows test verifying stored state, holds and timers.
void testMergeWindows() throws Exception {
    Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
    Coder<WindowedValue<KV<String, String>>> windowedKvCoder = FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
    KvCoder<String, List<String>> groupedCoder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
    Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder = FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());
    CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))).withTimestampCombiner(TimestampCombiner.EARLIEST), sdkComponents).toByteArray()));
    addObject(spec, WorkerPropertyNames.INPUT_CODER, CloudObjects.asCloudObject(windowedKvCoder, /*sdkComponents=*/
    null));
    ParallelInstruction mergeWindowsInstruction = new ParallelInstruction().setSystemName("MergeWindows-System").setName("MergeWindowsStep").setOriginalName("MergeWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(windowedGroupedCoder, /*sdkComponents=*/
    null))));
    List<ParallelInstruction> instructions = Arrays.asList(makeWindowingSourceInstruction(kvCoder), mergeWindowsInstruction, makeSinkInstruction(groupedCoder, 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
    Map<String, String> nameMap = new HashMap<>();
    nameMap.put("MergeWindowsStep", "MergeWindows");
    worker.addStateNameMappings(nameMap);
    worker.start();
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"" + DEFAULT_KEY_STRING + "\"" + "    sharding_key: " + DEFAULT_SHARDING_KEY + "    cache_token: 1" + "    work_token: 1" + "    message_bundles {" + "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" + "      messages {" + "        timestamp: 0" + "        data: \"" + dataStringForIndex(0) + "\"" + "      }" + "    }" + "  }" + "}", intervalWindowBytes(WINDOW_AT_ZERO)));
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    Iterable<CounterUpdate> counters = worker.buildCounters();
    // These tags and data are opaque strings and this is a change detector test.
    // The "/u" indicates the user's namespace, versus "/s" for system namespace
    String window = "/gAAAAAAAA-joBw/";
    String timerTagPrefix = "/s" + window + "+0";
    ByteString bufferTag = ByteString.copyFromUtf8(window + "+ubuf");
    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+upane");
    String watermarkDataHoldTag = window + "+uhold";
    String watermarkExtraHoldTag = window + "+uextra";
    String stateFamily = "MergeWindows";
    ByteString bufferData = ByteString.copyFromUtf8("data0");
    // Encoded form for Iterable<String>: -1, true, 'data0', false
    ByteString outputData = ByteString.copyFrom(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x01, 0x05, 0x64, 0x61, 0x74, 0x61, 0x30, 0x00 });
    // These values are not essential to the change detector test
    long timerTimestamp = 999000L;
    WorkItemCommitRequest actualOutput = result.get(1L);
    // Set timer
    verifyTimers(actualOutput, buildWatermarkTimer(timerTagPrefix, 999));
    assertThat(actualOutput.getBagUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagBag.newBuilder().setTag(bufferTag).setStateFamily(stateFamily).addValues(bufferData).build())));
    verifyHolds(actualOutput, buildHold(watermarkDataHoldTag, 0, false));
    // No state reads
    assertEquals(0L, splitIntToLong(getCounter(counters, "WindmillStateBytesRead").getInteger()));
    // Timer + buffer + watermark hold
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder(actualOutput).clearCounterUpdates().clearOutputMessages().build().getSerializedSize(), splitIntToLong(getCounter(counters, "WindmillStateBytesWritten").getInteger()));
    // Input messages
    assertEquals(VarInt.getLength(0L) + dataStringForIndex(0).length() + addPaneTag(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ZERO)).size() + // proto overhead
    5L, splitIntToLong(getCounter(counters, "WindmillShuffleBytesRead").getInteger()));
    Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
    getWorkResponse.addWorkBuilder().setComputationId(DEFAULT_COMPUTATION_ID).setInputDataWatermark(timerTimestamp + 1000).addWorkBuilder().setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING)).setShardingKey(DEFAULT_SHARDING_KEY).setWorkToken(2).setCacheToken(1).getTimersBuilder().addTimers(buildWatermarkTimer(timerTagPrefix, timerTimestamp));
    server.addWorkToOffer(getWorkResponse.build());
    long expectedBytesRead = 0L;
    Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
    Windmill.KeyedGetDataResponse.Builder dataBuilder = dataResponse.addDataBuilder().setComputationId(DEFAULT_COMPUTATION_ID).addDataBuilder().setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING)).setShardingKey(DEFAULT_SHARDING_KEY);
    dataBuilder.addBagsBuilder().setTag(bufferTag).setStateFamily(stateFamily).addValues(bufferData);
    dataBuilder.addWatermarkHoldsBuilder().setTag(ByteString.copyFromUtf8(watermarkDataHoldTag)).setStateFamily(stateFamily).addTimestamps(0);
    dataBuilder.addWatermarkHoldsBuilder().setTag(ByteString.copyFromUtf8(watermarkExtraHoldTag)).setStateFamily(stateFamily).addTimestamps(0);
    dataBuilder.addValuesBuilder().setTag(paneInfoTag).setStateFamily(stateFamily).getValueBuilder().setTimestamp(0).setData(ByteString.EMPTY);
    server.addDataToOffer(dataResponse.build());
    expectedBytesRead += dataBuilder.build().getSerializedSize();
    result = server.waitForAndGetCommits(1);
    counters = worker.buildCounters();
    actualOutput = result.get(2L);
    assertEquals(1, actualOutput.getOutputMessagesCount());
    assertEquals(DEFAULT_DESTINATION_STREAM_ID, actualOutput.getOutputMessages(0).getDestinationStreamId());
    assertEquals(DEFAULT_KEY_STRING, actualOutput.getOutputMessages(0).getBundles(0).getKey().toStringUtf8());
    assertEquals(0, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getTimestamp());
    assertEquals(outputData, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
    ByteString metadata = actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
    InputStream inStream = metadata.newInput();
    assertEquals(PaneInfo.createPane(true, true, Timing.ON_TIME), PaneInfoCoder.INSTANCE.decode(inStream));
    assertEquals(Arrays.asList(WINDOW_AT_ZERO), DEFAULT_WINDOW_COLLECTION_CODER.decode(inStream, Coder.Context.OUTER));
    // Data was deleted
    assertThat("" + actualOutput.getValueUpdatesList(), actualOutput.getValueUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagValue.newBuilder().setTag(paneInfoTag).setStateFamily(stateFamily).setValue(Windmill.Value.newBuilder().setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY)).build())));
    assertThat("" + actualOutput.getBagUpdatesList(), actualOutput.getBagUpdatesList(), Matchers.contains(Matchers.equalTo(Windmill.TagBag.newBuilder().setTag(bufferTag).setStateFamily(stateFamily).setDeleteAll(true).build())));
    verifyHolds(actualOutput, buildHold(watermarkDataHoldTag, -1, true), buildHold(watermarkExtraHoldTag, -1, true));
    // State reads for windowing
    assertEquals(expectedBytesRead, splitIntToLong(getCounter(counters, "WindmillStateBytesRead").getInteger()));
    // State updates to clear state
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder(actualOutput).clearCounterUpdates().clearOutputMessages().build().getSerializedSize(), splitIntToLong(getCounter(counters, "WindmillStateBytesWritten").getInteger()));
    // No input messages
    assertEquals(0L, splitIntToLong(getCounter(counters, "WindmillShuffleBytesRead").getInteger()));
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) InputStream(java.io.InputStream) GetWorkResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkResponse) KV(org.apache.beam.sdk.values.KV) KeyedGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) ComputationGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationGetDataResponse) KeyedGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse) GetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetDataResponse) InstructionInput(com.google.api.services.dataflow.model.InstructionInput) Test(org.junit.Test)

Example 60 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testKeyCommitTooLargeException.

@Test
public void testKeyCommitTooLargeException() throws Exception {
    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(kvCoder), makeDoFnInstruction(new LargeCommitFn(), 0, kvCoder), makeSinkInstruction(kvCoder, 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setExpectedExceptionCount(1);
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), true);
    worker.setMaxWorkItemCommitBytes(1000);
    worker.start();
    server.addWorkToOffer(makeInput(1, 0, "large_key", DEFAULT_SHARDING_KEY));
    server.addWorkToOffer(makeInput(2, 0, "key", DEFAULT_SHARDING_KEY));
    server.waitForEmptyWorkQueue();
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    assertEquals(2, result.size());
    assertEquals(makeExpectedOutput(2, 0, "key", DEFAULT_SHARDING_KEY, "key").build(), result.get(2L));
    assertTrue(result.containsKey(1L));
    WorkItemCommitRequest largeCommit = result.get(1L);
    assertEquals("large_key", largeCommit.getKey().toStringUtf8());
    assertEquals(makeExpectedTruncationRequestOutput(1, "large_key", DEFAULT_SHARDING_KEY, largeCommit.getEstimatedWorkItemCommitBytes()).build(), largeCommit);
    // Check this explicitly since the estimated commit bytes weren't actually
    // checked against an expected value in the previous step
    assertTrue(largeCommit.getEstimatedWorkItemCommitBytes() > 1000);
    // Spam worker updates a few times.
    int maxTries = 10;
    while (--maxTries > 0) {
        worker.reportPeriodicWorkerUpdates();
        Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
    }
    // We should see an exception reported for the large commit but not the small one.
    ArgumentCaptor<WorkItemStatus> workItemStatusCaptor = ArgumentCaptor.forClass(WorkItemStatus.class);
    verify(mockWorkUnitClient, atLeast(2)).reportWorkItemStatus(workItemStatusCaptor.capture());
    List<WorkItemStatus> capturedStatuses = workItemStatusCaptor.getAllValues();
    boolean foundErrors = false;
    for (WorkItemStatus status : capturedStatuses) {
        if (!status.getErrors().isEmpty()) {
            assertFalse(foundErrors);
            foundErrors = true;
            String errorMessage = status.getErrors().get(0).getMessage();
            assertThat(errorMessage, Matchers.containsString("KeyCommitTooLargeException"));
        }
    }
    assertTrue(foundErrors);
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) Test(org.junit.Test)

Aggregations

ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)73 Test (org.junit.Test)39 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)27 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)26 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)24 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)22 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)21 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)20 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)18 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)17 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)17 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)16 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)12 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)11 MapTask (com.google.api.services.dataflow.model.MapTask)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)11 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)11 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)11 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)11