use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class DoFnOperator method earlyBindStateIfNeeded.
private void earlyBindStateIfNeeded() throws IllegalArgumentException, IllegalAccessException {
if (keyCoder != null) {
if (doFn != null) {
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
FlinkStateInternals.EarlyBinder earlyBinder = new FlinkStateInternals.EarlyBinder(getKeyedStateBackend(), serializedOptions);
for (DoFnSignature.StateDeclaration value : signature.stateDeclarations().values()) {
StateSpec<?> spec = (StateSpec<?>) signature.stateDeclarations().get(value.id()).field().get(doFn);
spec.bind(value.id(), earlyBinder);
}
if (doFnRunner instanceof StatefulDoFnRunner) {
((StatefulDoFnRunner<InputT, OutputT, BoundedWindow>) doFnRunner).getSystemStateTags().forEach(tag -> tag.getSpec().bind(tag.getId(), earlyBinder));
}
}
}
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class PortableTimersExecutionTest method testTimerExecution.
@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
options.setRunner(CrashingRunner.class);
options.setFlinkMaster("[local]");
options.setStreaming(isStreaming);
options.setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
// Enough keys that we exercise interesting code paths
int numKeys = 50;
int numDuplicateTimers = 15;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < numDuplicateTimers; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext context) {
for (KV<String, Integer> stringIntegerKV : input) {
context.output(stringIntegerKV);
}
}
};
DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
String read = Objects.requireNonNull(state.read(), "State must not be null");
KV<String, Integer> of = KV.of(read, timerOutput);
r.output(of);
}
};
final Pipeline pipeline = Pipeline.create(options);
PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
Thread.sleep(1000);
}
assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class SimpleParDoFn method processSystemTimer.
private void processSystemTimer(TimerData timer) throws Exception {
// Timer owned by this class, for cleaning up state in expired windows
if (timer.getTimerId().equals(CLEANUP_TIMER_ID)) {
checkState(timer.getDomain().equals(TimeDomain.EVENT_TIME), "%s received cleanup timer with domain not EVENT_TIME: %s", this, timer);
checkState(timer.getNamespace() instanceof WindowNamespace, "%s received cleanup timer not for a %s: %s", this, WindowNamespace.class.getSimpleName(), timer);
BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow();
Instant targetTime = earliestAllowableCleanupTime(window, fnInfo.getWindowingStrategy());
checkState(!targetTime.isAfter(timer.getTimestamp()), "%s received state cleanup timer for window %s " + " that is before the appropriate cleanup time %s", this, window, targetTime);
fnRunner.onWindowExpiration(window, timer.getOutputTimestamp(), this.stepContext.stateInternals().getKey());
// This is for a timer for a window that is expired, so clean it up.
for (StateDeclaration stateDecl : fnSignature.stateDeclarations().values()) {
StateTag<?> tag;
try {
tag = StateTags.tagForSpec(stateDecl.id(), (StateSpec) stateDecl.field().get(fnInfo.getDoFn()));
} catch (IllegalAccessException e) {
throw new RuntimeException(String.format("Error accessing %s for %s", StateSpec.class.getName(), fnInfo.getDoFn().getClass().getName()), e);
}
StateInternals stateInternals = userStepContext.stateInternals();
org.apache.beam.sdk.state.State state = stateInternals.state(timer.getNamespace(), tag);
state.clear();
}
}
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class SamzaStoreStateInternalsTest method testIteratorClosed.
@Test
public void testIteratorClosed() {
final String stateId = "foo";
DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {
@StateId(stateId)
private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> setState) {
SamzaSetState<Integer> state = (SamzaSetState<Integer>) setState;
state.add(c.element().getValue());
// the iterator for size needs to be closed
int size = Iterators.size(state.readIterator().read());
if (size > 1) {
final Iterator<Integer> iterator = state.readIterator().read();
assertTrue(iterator.hasNext());
// this iterator should be closed too
iterator.next();
}
}
};
pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setRunner(TestSamzaRunner.class);
Map<String, String> configs = new HashMap<>(ConfigBuilder.localRunConfig());
configs.put("stores.foo.factory", TestStorageEngine.class.getName());
pipeline.getOptions().as(SamzaPipelineOptions.class).setConfigOverride(configs);
pipeline.run();
// The test code creates 7 underlying iterators, and 1 more is created during state.clear()
// Verify all of them are closed
assertEquals(8, TestStore.iterators.size());
TestStore.iterators.forEach(iter -> assertTrue(iter.closed));
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ConfigGeneratorTest method testUserStoreConfig.
@Test
public void testUserStoreConfig() {
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setJobName("TestStoreConfig");
options.setRunner(SamzaRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))).apply(ParDo.of(new DoFn<KV<String, String>, Void>() {
private static final String testState = "testState";
@StateId(testState)
private final StateSpec<ValueState<Integer>> state = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext context, @StateId(testState) ValueState<Integer> state) {
}
}));
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config = configBuilder.build();
assertEquals(RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.testState.factory"));
assertEquals("byteArraySerde", config.get("stores.testState.key.serde"));
assertEquals("stateValueSerde", config.get("stores.testState.msg.serde"));
assertNull(config.get("stores.testState.changelog"));
options.setStateDurable(true);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config2 = configBuilder.build();
assertEquals("TestStoreConfig-1-testState-changelog", config2.get("stores.testState.changelog"));
}
Aggregations