use of org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions in project beam by apache.
the class DataflowRunnerHarness method main.
/**
* Fetches and processes work units from the Dataflow service.
*/
public static void main(String[] unusedArgs) throws Exception {
RunnerApi.@Nullable Pipeline pipeline = DataflowWorkerHarnessHelper.getPipelineFromEnv();
// This descriptor is used for all services except logging. They are isolated to keep
// critical traffic protected from best effort traffic.
ApiServiceDescriptor controlApiService = DataflowWorkerHarnessHelper.getControlDescriptor();
ApiServiceDescriptor loggingApiService = DataflowWorkerHarnessHelper.getLoggingDescriptor();
ApiServiceDescriptor statusApiService = DataflowWorkerHarnessHelper.getStatusDescriptor();
LOG.info("{} started, using port {} for control, {} for logging.", DataflowRunnerHarness.class, controlApiService, loggingApiService);
DataflowWorkerHarnessHelper.initializeLogging(DataflowRunnerHarness.class);
DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions(DataflowRunnerHarness.class);
DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);
// Initialized registered file systems.˜
FileSystems.setDefaultPipelineOptions(pipelineOptions);
DataflowPipelineDebugOptions dataflowOptions = pipelineOptions.as(DataflowPipelineDebugOptions.class);
ServerFactory serverFactory;
if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll_domain_socket")) {
serverFactory = ServerFactory.createEpollDomainSocket();
} else if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll")) {
serverFactory = ServerFactory.createEpollSocket();
} else {
serverFactory = ServerFactory.createDefault();
}
ServerStreamObserverFactory streamObserverFactory = ServerStreamObserverFactory.fromOptions(pipelineOptions);
Server servicesServer = null;
Server loggingServer = null;
Server statusServer = null;
try (BeamFnLoggingService beamFnLoggingService = new BeamFnLoggingService(loggingApiService, DataflowWorkerLoggingInitializer.getSdkLoggingHandler()::publish, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamFnControlService beamFnControlService = new BeamFnControlService(controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamFnDataGrpcService beamFnDataService = new BeamFnDataGrpcService(pipelineOptions, controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
BeamWorkerStatusGrpcService beamWorkerStatusGrpcService = statusApiService == null ? null : BeamWorkerStatusGrpcService.create(statusApiService, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
GrpcStateService beamFnStateService = GrpcStateService.create()) {
servicesServer = serverFactory.create(ImmutableList.of(beamFnControlService, beamFnDataService, beamFnStateService), controlApiService);
loggingServer = serverFactory.create(ImmutableList.of(beamFnLoggingService), loggingApiService);
// gRPC server for obtaining SDK harness runtime status information.
if (beamWorkerStatusGrpcService != null) {
statusServer = serverFactory.create(ImmutableList.of(beamWorkerStatusGrpcService), statusApiService);
}
start(pipeline, pipelineOptions, beamFnControlService, beamFnDataService, controlApiService, beamFnStateService, beamWorkerStatusGrpcService);
if (statusServer != null) {
statusServer.shutdown();
}
servicesServer.shutdown();
loggingServer.shutdown();
// wait 30 secs for outstanding requests to finish.
if (statusServer != null) {
statusServer.awaitTermination(30, TimeUnit.SECONDS);
}
servicesServer.awaitTermination(30, TimeUnit.SECONDS);
loggingServer.awaitTermination(30, TimeUnit.SECONDS);
} finally {
if (statusServer != null && !statusServer.isTerminated()) {
statusServer.shutdownNow();
}
if (servicesServer != null && !servicesServer.isTerminated()) {
servicesServer.shutdownNow();
}
if (loggingServer != null && !loggingServer.isTerminated()) {
loggingServer.shutdownNow();
}
}
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testExceptionInvalidatesCache.
@Test
public void testExceptionInvalidatesCache() throws Exception {
// We'll need to force the system to limit bundles to one message at a time.
// Sequence is as follows:
// 01. GetWork[0] (token 0)
// 02. Create counter reader
// 03. Counter yields 0
// 04. GetData[0] (state as null)
// 05. Read state as null
// 06. Set state as 42
// 07. THROW on taking counter reader checkpoint
// 08. Create counter reader
// 09. Counter yields 0
// 10. GetData[1] (state as null)
// 11. Read state as null (*** not 42 ***)
// 12. Take counter reader checkpoint as 0
// 13. CommitWork[0] (message 0:0, state 42, checkpoint 0)
// 14. GetWork[1] (token 1, checkpoint as 0)
// 15. Counter yields 1
// 16. Read (cached) state as 42
// 17. Take counter reader checkpoint 1
// 18. CommitWork[1] (message 0:1, checkpoint 1)
// 19. GetWork[2] (token 2, checkpoint as 1)
// 20. Counter yields 2
// 21. THROW on processElement
// 22. Recreate reader from checkpoint 1
// 23. Counter yields 2 (*** not eof ***)
// 24. GetData[2] (state as 42)
// 25. Read state as 42
// 26. Take counter reader checkpoint 2
// 27. CommitWork[2] (message 0:2, checkpoint 2)
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setExpectedExceptionCount(2);
DataflowPipelineOptions options = createTestingPipelineOptions(server);
options.setNumWorkers(1);
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
debugOptions.setUnboundedReaderMaxElements(1);
CloudObject codec = CloudObjects.asCloudObject(WindowedValue.getFullCoder(ValueWithRecordId.ValueWithRecordIdCoder.of(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/
null);
TestCountingSource counter = new TestCountingSource(3).withThrowOnFirstSnapshot(true);
List<ParallelInstruction> instructions = Arrays.asList(new ParallelInstruction().setOriginalName("OriginalReadName").setSystemName("Read").setName(DEFAULT_PARDO_USER_NAME).setRead(new ReadInstruction().setSource(CustomSources.serializeToCloudSource(counter, options).setCodec(codec))).setOutputs(Arrays.asList(new InstructionOutput().setName("read_output").setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(codec))), makeDoFnInstruction(new TestExceptionInvalidatesCacheFn(), 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()), makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
StreamingDataflowWorker worker = makeWorker(instructions, options.as(StreamingDataflowWorkerOptions.class), true);
worker.setRetryLocallyDelayMs(100);
worker.start();
// Three GetData requests
for (int i = 0; i < 3; i++) {
ByteString state;
if (i == 0 || i == 1) {
state = ByteString.EMPTY;
} else {
state = ByteString.copyFrom(new byte[] { 42 });
}
Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
dataResponse.addDataBuilder().setComputationId(DEFAULT_COMPUTATION_ID).addDataBuilder().setKey(ByteString.copyFromUtf8("0000000000000001")).setShardingKey(1).addValuesBuilder().setTag(ByteString.copyFromUtf8("//+uint")).setStateFamily(DEFAULT_PARDO_STATE_FAMILY).getValueBuilder().setTimestamp(0).setData(state);
server.addDataToOffer(dataResponse.build());
}
// Three GetWork requests and commits
for (int i = 0; i < 3; i++) {
StringBuilder sb = new StringBuilder();
sb.append("work {\n");
sb.append(" computation_id: \"computation\"\n");
sb.append(" input_data_watermark: 0\n");
sb.append(" work {\n");
sb.append(" key: \"0000000000000001\"\n");
sb.append(" sharding_key: 1\n");
sb.append(" work_token: ");
sb.append(i);
sb.append(" cache_token: 1");
sb.append("\n");
if (i > 0) {
int previousCheckpoint = i - 1;
sb.append(" source_state {\n");
sb.append(" state: \"");
sb.append((char) previousCheckpoint);
sb.append("\"\n");
// We'll elide the finalize ids since it's not necessary to trigger the finalizer
// for this test.
sb.append(" }\n");
}
sb.append(" }\n");
sb.append("}\n");
server.addWorkToOffer(buildInput(sb.toString(), null));
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Windmill.WorkItemCommitRequest commit = result.get((long) i);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
sb = new StringBuilder();
sb.append("key: \"0000000000000001\"\n");
sb.append("sharding_key: 1\n");
sb.append("work_token: ");
sb.append(i);
sb.append("\n");
sb.append("cache_token: 1\n");
sb.append("output_messages {\n");
sb.append(" destination_stream_id: \"out\"\n");
sb.append(" bundles {\n");
sb.append(" key: \"0000000000000001\"\n");
int messageNum = i;
sb.append(" messages {\n");
sb.append(" timestamp: ");
sb.append(messageNum * 1000);
sb.append("\n");
sb.append(" data: \"0:");
sb.append(messageNum);
sb.append("\"\n");
sb.append(" }\n");
sb.append(" messages_ids: \"\"\n");
sb.append(" }\n");
sb.append("}\n");
if (i == 0) {
sb.append("value_updates {\n");
sb.append(" tag: \"//+uint\"\n");
sb.append(" value {\n");
sb.append(" timestamp: 0\n");
sb.append(" data: \"");
sb.append((char) 42);
sb.append("\"\n");
sb.append(" }\n");
sb.append(" state_family: \"parDoStateFamily\"\n");
sb.append("}\n");
}
int sourceState = i;
sb.append("source_state_updates {\n");
sb.append(" state: \"");
sb.append((char) sourceState);
sb.append("\"\n");
sb.append(" finalize_ids: ");
sb.append(finalizeId);
sb.append("}\n");
sb.append("source_watermark: ");
sb.append((sourceState + 1) * 1000);
sb.append("\n");
sb.append("source_backlog_bytes: 7\n");
assertThat(// for the current test.
setValuesTimestamps(commit.toBuilder().clearOutputTimers()).build(), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), ImmutableList.of(GlobalWindow.INSTANCE)), parseCommitRequest(sb.toString())).build()));
}
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions in project beam by apache.
the class SimpleParDoFnTest method testOutputsPerElementCounterDisabledViaExperiment.
// TODO: Remove once Distributions has shipped.
@Test
public void testOutputsPerElementCounterDisabledViaExperiment() throws Exception {
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
List<String> experiments = debugOptions.getExperiments();
experiments.remove(SimpleParDoFn.OUTPUTS_PER_ELEMENT_EXPERIMENT);
debugOptions.setExperiments(experiments);
List<CounterUpdate> counterUpdates = executeParDoFnCounterTest(0);
CounterName expectedName = CounterName.named("per-element-output-count").withOriginalName(stepContext.getNameContext());
assertThat(counterUpdates, not(contains(hasStructuredName(expectedName, "DISTRIBUTION"))));
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions in project beam by apache.
the class WorkerCustomSourcesTest method testReadUnboundedReader.
@Test
public void testReadUnboundedReader() throws Exception {
CounterSet counterSet = new CounterSet();
StreamingModeExecutionStateRegistry executionStateRegistry = new StreamingModeExecutionStateRegistry(null);
ReaderCache readerCache = new ReaderCache(Duration.standardMinutes(1), Runnable::run);
StreamingModeExecutionContext context = new StreamingModeExecutionContext(counterSet, "computationId", readerCache, /*stateNameMap=*/
ImmutableMap.of(), /*stateCache=*/
null, StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stageName"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
options.setNumWorkers(5);
int maxElements = 10;
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
debugOptions.setUnboundedReaderMaxElements(maxElements);
ByteString state = ByteString.EMPTY;
for (int i = 0; i < 10 * maxElements; ) /* Incremented in inner loop */
{
// Initialize streaming context with state from previous iteration.
context.start("key", Windmill.WorkItem.newBuilder().setKey(// key is zero-padded index.
ByteString.copyFromUtf8("0000000000000001")).setWorkToken(// Must be increasing across activations for cache to be used.
i).setCacheToken(1).setSourceState(// Source state.
Windmill.SourceState.newBuilder().setState(state).build()).build(), // input watermark
new Instant(0), // output watermark
null, // synchronized processing time
null, // StateReader
null, // StateFetcher
null, Windmill.WorkItemCommitRequest.newBuilder());
@SuppressWarnings({ "unchecked", "rawtypes" }) NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (NativeReader) WorkerCustomSources.create((CloudObject) serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options).getSpec(), options, context);
// Verify data.
Instant beforeReading = Instant.now();
int numReadOnThisIteration = 0;
for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value : ReaderUtils.readAllFromReader(reader)) {
assertEquals(KV.of(0, i), value.getValue().getValue());
assertArrayEquals(encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)), value.getValue().getId());
assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
assertEquals(i, value.getTimestamp().getMillis());
i++;
numReadOnThisIteration++;
}
Instant afterReading = Instant.now();
long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
assertThat(new Duration(beforeReading, afterReading).getStandardSeconds(), lessThanOrEqualTo(maxReadSec + 1));
assertThat(numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
// Extract and verify state modifications.
context.flushState();
state = context.getOutputBuilder().getSourceStateUpdates().getState();
// CountingSource's watermark is the last record + 1. i is now one past the last record,
// so the expected watermark is i millis.
assertEquals(TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
assertEquals(1, context.getOutputBuilder().getSourceStateUpdates().getFinalizeIdsList().size());
assertNotNull(readerCache.acquireReader(context.getComputationKey(), context.getWork().getCacheToken(), context.getWorkToken() + 1));
assertEquals(7L, context.getBacklogBytes());
}
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions in project beam by apache.
the class DataflowRunnerTest method testHasExperiment.
@Test
public void testHasExperiment() {
DataflowPipelineDebugOptions options = PipelineOptionsFactory.as(DataflowPipelineDebugOptions.class);
options.setExperiments(null);
assertFalse(DataflowRunner.hasExperiment(options, "foo"));
options.setExperiments(ImmutableList.of("foo", "bar"));
assertTrue(DataflowRunner.hasExperiment(options, "foo"));
assertTrue(DataflowRunner.hasExperiment(options, "bar"));
assertFalse(DataflowRunner.hasExperiment(options, "baz"));
assertFalse(DataflowRunner.hasExperiment(options, "ba"));
assertFalse(DataflowRunner.hasExperiment(options, "BAR"));
}
Aggregations