use of org.apache.beam.sdk.transforms.DoFn in project components by Talend.
the class TCompBoundedSourceSinkAdapterTest method testSource.
@Test
public void testSource() {
Pipeline pipeline = TestPipeline.create();
FixedFlowProperties fixedFlowProperties = new FixedFlowProperties("fixedFlowProperties");
fixedFlowProperties.init();
fixedFlowProperties.data.setValue("a;b;c");
fixedFlowProperties.rowDelimited.setValue(";");
FixedFlowSource fixedFlowSource = new FixedFlowSource();
fixedFlowSource.initialize(null, fixedFlowProperties);
TCompBoundedSourceAdapter source = new TCompBoundedSourceAdapter(fixedFlowSource);
PCollection<String> result = pipeline.apply(Read.from(source)).apply(ParDo.of(new DoFn<IndexedRecord, String>() {
@DoFn.ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element().get(0).toString());
}
}));
PAssert.that(result).containsInAnyOrder(Arrays.asList("a", "b", "c"));
pipeline.run();
}
use of org.apache.beam.sdk.transforms.DoFn in project component-runtime by Talend.
the class InMemoryQueueIOTest method input.
@Test(timeout = 60000)
public void input() {
INPUT_OUTPUTS.clear();
final PipelineResult result;
try (final LoopState state = LoopState.newTracker(null)) {
IntStream.range(0, 2).forEach(i -> state.push(new RowStruct(i)));
pipeline.apply(InMemoryQueueIO.from(state)).apply(ParDo.of(new DoFn<JsonObject, Void>() {
@ProcessElement
public void onElement(final ProcessContext context) {
INPUT_OUTPUTS.add(context.element());
}
}));
result = pipeline.run();
IntStream.range(2, 5).forEach(i -> state.push(new RowStruct(i)));
// for inputs it is key to notify beam we are done
state.end();
final long end = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(2);
while (INPUT_OUTPUTS.size() < 5 && end - System.currentTimeMillis() >= 0) {
try {
sleep(150);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
result.waitUntilFinish();
assertEquals(5, INPUT_OUTPUTS.size());
assertEquals(IntStream.range(0, 5).boxed().collect(toSet()), INPUT_OUTPUTS.stream().mapToInt(o -> o.getInt("id")).boxed().collect(toSet()));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class SimpleParDoFnTest method testStateTracking.
@Test
public void testStateTracking() throws Exception {
ExecutionStateTracker tracker = ExecutionStateTracker.newForTest();
TestOperationContext operationContext = TestOperationContext.create(new CounterSet(), NameContextsForTests.nameContextForTest(), new MetricsContainerImpl(NameContextsForTests.ORIGINAL_NAME), tracker);
class StateTestingDoFn extends DoFn<Integer, String> {
private boolean startCalled = false;
@StartBundle
public void startBundle() throws Exception {
startCalled = true;
assertThat(tracker.getCurrentState(), equalTo(operationContext.getStartState()));
}
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
assertThat(startCalled, equalTo(true));
assertThat(tracker.getCurrentState(), equalTo(operationContext.getProcessState()));
}
}
StateTestingDoFn fn = new StateTestingDoFn();
DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.singleInstance(fnInfo), NullSideInputReader.empty(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0, new TupleTag<>("declared"), 1), BatchModeExecutionContext.forTesting(options, operationContext.counterFactory(), "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
// This test ensures proper behavior of the state sampling even with lazy initialization.
try (Closeable trackerCloser = tracker.activate()) {
try (Closeable processCloser = operationContext.enterProcess()) {
userParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
}
}
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class ProcessBundleDescriptorsTest method testLengthPrefixingOfKeyCoderInStatefulExecutableStage.
/**
* Tests that a stateful stage will wrap the key coder of a stateful transform in a
* LengthPrefixCoder.
*/
@Test
public void testLengthPrefixingOfKeyCoderInStatefulExecutableStage() throws Exception {
// Add another stateful stage with a non-standard key coder
Pipeline p = Pipeline.create();
Coder<Void> keycoder = VoidCoder.of();
assertThat(ModelCoderRegistrar.isKnownCoder(keycoder), is(false));
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], KV<Void, String>>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
})).setCoder(KvCoder.of(keycoder, StringUtf8Coder.of())).apply("userState", ParDo.of(new DoFn<KV<Void, String>, KV<Void, String>>() {
@StateId("stateId")
private final StateSpec<BagState<String>> bufferState = StateSpecs.bag(StringUtf8Coder.of());
@TimerId("timerId")
private final TimerSpec timerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(@Element KV<Void, String> element, @StateId("stateId") BagState<String> state, @TimerId("timerId") Timer timer, OutputReceiver<KV<Void, String>> r) {
}
@OnTimer("timerId")
public void onTimer() {
}
})).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getUserStates().stream().anyMatch(spec -> spec.localName().equals("stateId")));
checkState(optionalStage.isPresent(), "Expected a stage with user state.");
ExecutableStage stage = optionalStage.get();
PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
// Ensure original key coder is not a LengthPrefixCoder
Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
String originalKeyCoderId = ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId();
RunnerApi.Coder originalKeyCoder = stageCoderMap.get(originalKeyCoderId);
assertThat(originalKeyCoder.getSpec().getUrn(), is(CoderTranslation.JAVA_SERIALIZED_CODER_URN));
// Now create ProcessBundleDescriptor and check for the LengthPrefixCoder around the key coder
BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
String keyCoderId = ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId();
RunnerApi.Coder keyCoder = pbsCoderMap.get(keyCoderId);
ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
TimerReference timerRef = Iterables.getOnlyElement(stage.getTimers());
String timerTransformId = timerRef.transform().getId();
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(pbd.getTransformsOrThrow(timerTransformId).getSpec().getPayload());
RunnerApi.TimerFamilySpec timerSpec = parDoPayload.getTimerFamilySpecsOrThrow(timerRef.localName());
RunnerApi.Coder timerCoder = pbsCoderMap.get(timerSpec.getTimerFamilyCoderId());
String timerKeyCoderId = timerCoder.getComponentCoderIds(0);
RunnerApi.Coder timerKeyCoder = pbsCoderMap.get(timerKeyCoderId);
ensureLengthPrefixed(timerKeyCoder, originalKeyCoder, pbsCoderMap);
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class RemoteExecutionTest method testExecution.
@Test
public void testExecution() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
ctxt.output("zero");
ctxt.output("one");
ctxt.output("two");
}
})).apply("len", ParDo.of(new DoFn<String, Long>() {
@ProcessElement
public void process(ProcessContext ctxt) {
ctxt.output((long) ctxt.element().length());
}
})).apply("addKeys", WithKeys.of("foo")).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of())).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
checkState(fused.getFusedStages().size() == 1, "Expected exactly one fused stage");
ExecutableStage stage = fused.getFusedStages().iterator().next();
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("my_stage", stage, dataServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations());
Map<String, ? super Coder<WindowedValue<?>>> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, Collection<? super WindowedValue<?>>> outputValues = new HashMap<>();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, ? super Coder<WindowedValue<?>>> remoteOutputCoder : remoteOutputCoders.entrySet()) {
List<? super WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
outputValues.put(remoteOutputCoder.getKey(), outputContents);
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder) remoteOutputCoder.getValue(), (FnDataReceiver<? super WindowedValue<?>>) outputContents::add));
}
try (RemoteBundle bundle = processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(new byte[0]));
}
for (Collection<? super WindowedValue<?>> windowedValues : outputValues.values()) {
assertThat(windowedValues, containsInAnyOrder(valueInGlobalWindow(byteValueOf("foo", 4)), valueInGlobalWindow(byteValueOf("foo", 3)), valueInGlobalWindow(byteValueOf("foo", 3))));
}
}
Aggregations