Search in sources :

Example 41 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class DoFnOperator method earlyBindStateIfNeeded.

private void earlyBindStateIfNeeded() throws IllegalArgumentException, IllegalAccessException {
    if (keyCoder != null) {
        if (doFn != null) {
            DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
            FlinkStateInternals.EarlyBinder earlyBinder = new FlinkStateInternals.EarlyBinder(getKeyedStateBackend(), serializedOptions);
            for (DoFnSignature.StateDeclaration value : signature.stateDeclarations().values()) {
                StateSpec<?> spec = (StateSpec<?>) signature.stateDeclarations().get(value.id()).field().get(doFn);
                spec.bind(value.id(), earlyBinder);
            }
            if (doFnRunner instanceof StatefulDoFnRunner) {
                ((StatefulDoFnRunner<InputT, OutputT, BoundedWindow>) doFnRunner).getSystemStateTags().forEach(tag -> tag.getSpec().bind(tag.getId(), earlyBinder));
            }
        }
    }
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 42 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class PortableTimersExecutionTest method testTimerExecution.

@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
    FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
    options.setRunner(CrashingRunner.class);
    options.setFlinkMaster("[local]");
    options.setStreaming(isStreaming);
    options.setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    int numDuplicateTimers = 15;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < numDuplicateTimers; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (KV<String, Integer> stringIntegerKV : input) {
                context.output(stringIntegerKV);
            }
        }
    };
    DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
            String read = Objects.requireNonNull(state.read(), "State must not be null");
            KV<String, Integer> of = KV.of(read, timerOutput);
            r.output(of);
        }
    };
    final Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
    assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
Also used : ArrayList(java.util.ArrayList) StateSpec(org.apache.beam.sdk.state.StateSpec) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 43 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class SimpleParDoFn method processSystemTimer.

private void processSystemTimer(TimerData timer) throws Exception {
    // Timer owned by this class, for cleaning up state in expired windows
    if (timer.getTimerId().equals(CLEANUP_TIMER_ID)) {
        checkState(timer.getDomain().equals(TimeDomain.EVENT_TIME), "%s received cleanup timer with domain not EVENT_TIME: %s", this, timer);
        checkState(timer.getNamespace() instanceof WindowNamespace, "%s received cleanup timer not for a %s: %s", this, WindowNamespace.class.getSimpleName(), timer);
        BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow();
        Instant targetTime = earliestAllowableCleanupTime(window, fnInfo.getWindowingStrategy());
        checkState(!targetTime.isAfter(timer.getTimestamp()), "%s received state cleanup timer for window %s " + " that is before the appropriate cleanup time %s", this, window, targetTime);
        fnRunner.onWindowExpiration(window, timer.getOutputTimestamp(), this.stepContext.stateInternals().getKey());
        // This is for a timer for a window that is expired, so clean it up.
        for (StateDeclaration stateDecl : fnSignature.stateDeclarations().values()) {
            StateTag<?> tag;
            try {
                tag = StateTags.tagForSpec(stateDecl.id(), (StateSpec) stateDecl.field().get(fnInfo.getDoFn()));
            } catch (IllegalAccessException e) {
                throw new RuntimeException(String.format("Error accessing %s for %s", StateSpec.class.getName(), fnInfo.getDoFn().getClass().getName()), e);
            }
            StateInternals stateInternals = userStepContext.stateInternals();
            org.apache.beam.sdk.state.State state = stateInternals.state(timer.getNamespace(), tag);
            state.clear();
        }
    }
}
Also used : Instant(org.joda.time.Instant) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) StateInternals(org.apache.beam.runners.core.StateInternals) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration)

Example 44 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class SamzaStoreStateInternalsTest method testIteratorClosed.

@Test
public void testIteratorClosed() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {

        @StateId(stateId)
        private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> setState) {
            SamzaSetState<Integer> state = (SamzaSetState<Integer>) setState;
            state.add(c.element().getValue());
            // the iterator for size needs to be closed
            int size = Iterators.size(state.readIterator().read());
            if (size > 1) {
                final Iterator<Integer> iterator = state.readIterator().read();
                assertTrue(iterator.hasNext());
                // this iterator should be closed too
                iterator.next();
            }
        }
    };
    pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
    SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    options.setRunner(TestSamzaRunner.class);
    Map<String, String> configs = new HashMap<>(ConfigBuilder.localRunConfig());
    configs.put("stores.foo.factory", TestStorageEngine.class.getName());
    pipeline.getOptions().as(SamzaPipelineOptions.class).setConfigOverride(configs);
    pipeline.run();
    // The test code creates 7 underlying iterators, and 1 more is created during state.clear()
    // Verify all of them are closed
    assertEquals(8, TestStore.iterators.size());
    TestStore.iterators.forEach(iter -> assertTrue(iter.closed));
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) DoFn(org.apache.beam.sdk.transforms.DoFn) SamzaSetState(org.apache.beam.runners.samza.state.SamzaSetState) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) SamzaSetState(org.apache.beam.runners.samza.state.SamzaSetState) SetState(org.apache.beam.sdk.state.SetState) Test(org.junit.Test)

Example 45 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ConfigGeneratorTest method testUserStoreConfig.

@Test
public void testUserStoreConfig() {
    SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    options.setJobName("TestStoreConfig");
    options.setRunner(SamzaRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))).apply(ParDo.of(new DoFn<KV<String, String>, Void>() {

        private static final String testState = "testState";

        @StateId(testState)
        private final StateSpec<ValueState<Integer>> state = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext context, @StateId(testState) ValueState<Integer> state) {
        }
    }));
    final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
    final ConfigBuilder configBuilder = new ConfigBuilder(options);
    SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
    final Config config = configBuilder.build();
    assertEquals(RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.testState.factory"));
    assertEquals("byteArraySerde", config.get("stores.testState.key.serde"));
    assertEquals("stateValueSerde", config.get("stores.testState.msg.serde"));
    assertNull(config.get("stores.testState.changelog"));
    options.setStateDurable(true);
    SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
    final Config config2 = configBuilder.build();
    assertEquals("TestStoreConfig-1-testState-changelog", config2.get("stores.testState.changelog"));
}
Also used : ZkConfig(org.apache.samza.config.ZkConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) RocksDbKeyValueStorageEngineFactory(org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory) PValue(org.apache.beam.sdk.values.PValue) Pipeline(org.apache.beam.sdk.Pipeline) StateSpec(org.apache.beam.sdk.state.StateSpec) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)47 Test (org.junit.Test)38 KV (org.apache.beam.sdk.values.KV)35 Matchers.containsString (org.hamcrest.Matchers.containsString)24 Category (org.junit.experimental.categories.Category)24 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 ValueState (org.apache.beam.sdk.state.ValueState)21 DoFn (org.apache.beam.sdk.transforms.DoFn)19 ArrayList (java.util.ArrayList)10 CombiningState (org.apache.beam.sdk.state.CombiningState)10 Pipeline (org.apache.beam.sdk.Pipeline)9 List (java.util.List)8 TupleTag (org.apache.beam.sdk.values.TupleTag)8 Instant (org.joda.time.Instant)8 BagState (org.apache.beam.sdk.state.BagState)7 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)7 TupleTagList (org.apache.beam.sdk.values.TupleTagList)7 MapState (org.apache.beam.sdk.state.MapState)6 SetState (org.apache.beam.sdk.state.SetState)6 Timer (org.apache.beam.sdk.state.Timer)6