Search in sources :

Example 16 with ListStateDescriptor

use of org.apache.flink.api.common.state.ListStateDescriptor in project flink by apache.

the class StateBackendTestBase method testListState.

@Test
@SuppressWarnings("unchecked,rawtypes")
public void testListState() {
    try {
        CheckpointStreamFactory streamFactory = createStreamFactory();
        AbstractKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE);
        ListStateDescriptor<String> kvId = new ListStateDescriptor<>("id", String.class);
        kvId.initializeSerializerUnlessSet(new ExecutionConfig());
        TypeSerializer<Integer> keySerializer = IntSerializer.INSTANCE;
        TypeSerializer<VoidNamespace> namespaceSerializer = VoidNamespaceSerializer.INSTANCE;
        TypeSerializer<String> valueSerializer = kvId.getElementSerializer();
        ListState<String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
        @SuppressWarnings("unchecked") InternalKvState<VoidNamespace> kvState = (InternalKvState<VoidNamespace>) state;
        Joiner joiner = Joiner.on(",");
        // some modifications to the state
        backend.setCurrentKey(1);
        assertEquals(null, state.get());
        assertEquals(null, getSerializedList(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
        state.add("1");
        backend.setCurrentKey(2);
        assertEquals(null, state.get());
        assertEquals(null, getSerializedList(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer));
        state.add("2");
        backend.setCurrentKey(1);
        assertEquals("1", joiner.join(state.get()));
        assertEquals("1", joiner.join(getSerializedList(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        // draw a snapshot
        KeyGroupsStateHandle snapshot1 = FutureUtil.runIfNotDoneAndGet(backend.snapshot(682375462378L, 2, streamFactory, CheckpointOptions.forFullCheckpoint()));
        // make some more modifications
        backend.setCurrentKey(1);
        state.add("u1");
        backend.setCurrentKey(2);
        state.add("u2");
        backend.setCurrentKey(3);
        state.add("u3");
        // draw another snapshot
        KeyGroupsStateHandle snapshot2 = FutureUtil.runIfNotDoneAndGet(backend.snapshot(682375462379L, 4, streamFactory, CheckpointOptions.forFullCheckpoint()));
        // validate the original state
        backend.setCurrentKey(1);
        assertEquals("1,u1", joiner.join(state.get()));
        assertEquals("1,u1", joiner.join(getSerializedList(kvState, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.setCurrentKey(2);
        assertEquals("2,u2", joiner.join(state.get()));
        assertEquals("2,u2", joiner.join(getSerializedList(kvState, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.setCurrentKey(3);
        assertEquals("u3", joiner.join(state.get()));
        assertEquals("u3", joiner.join(getSerializedList(kvState, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.dispose();
        // restore the first snapshot and validate it
        backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot1);
        snapshot1.discardState();
        ListState<String> restored1 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
        @SuppressWarnings("unchecked") InternalKvState<VoidNamespace> restoredKvState1 = (InternalKvState<VoidNamespace>) restored1;
        backend.setCurrentKey(1);
        assertEquals("1", joiner.join(restored1.get()));
        assertEquals("1", joiner.join(getSerializedList(restoredKvState1, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.setCurrentKey(2);
        assertEquals("2", joiner.join(restored1.get()));
        assertEquals("2", joiner.join(getSerializedList(restoredKvState1, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.dispose();
        // restore the second snapshot and validate it
        backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot2);
        snapshot2.discardState();
        ListState<String> restored2 = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
        @SuppressWarnings("unchecked") InternalKvState<VoidNamespace> restoredKvState2 = (InternalKvState<VoidNamespace>) restored2;
        backend.setCurrentKey(1);
        assertEquals("1,u1", joiner.join(restored2.get()));
        assertEquals("1,u1", joiner.join(getSerializedList(restoredKvState2, 1, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.setCurrentKey(2);
        assertEquals("2,u2", joiner.join(restored2.get()));
        assertEquals("2,u2", joiner.join(getSerializedList(restoredKvState2, 2, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.setCurrentKey(3);
        assertEquals("u3", joiner.join(restored2.get()));
        assertEquals("u3", joiner.join(getSerializedList(restoredKvState2, 3, keySerializer, VoidNamespace.INSTANCE, namespaceSerializer, valueSerializer)));
        backend.dispose();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Joiner(com.google.common.base.Joiner) BlockerCheckpointStreamFactory(org.apache.flink.runtime.util.BlockerCheckpointStreamFactory) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) InternalKvState(org.apache.flink.runtime.state.internal.InternalKvState) Test(org.junit.Test)

Example 17 with ListStateDescriptor

use of org.apache.flink.api.common.state.ListStateDescriptor in project flink by apache.

the class StateBackendTestBase method testListStateRestoreWithWrongSerializers.

@Test
@SuppressWarnings("unchecked")
public void testListStateRestoreWithWrongSerializers() {
    try {
        CheckpointStreamFactory streamFactory = createStreamFactory();
        AbstractKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE);
        ListStateDescriptor<String> kvId = new ListStateDescriptor<>("id", String.class);
        ListState<String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
        backend.setCurrentKey(1);
        state.add("1");
        backend.setCurrentKey(2);
        state.add("2");
        // draw a snapshot
        KeyGroupsStateHandle snapshot1 = FutureUtil.runIfNotDoneAndGet(backend.snapshot(682375462378L, 2, streamFactory, CheckpointOptions.forFullCheckpoint()));
        backend.dispose();
        // restore the first snapshot and validate it
        backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot1);
        snapshot1.discardState();
        @SuppressWarnings("unchecked") TypeSerializer<String> fakeStringSerializer = (TypeSerializer<String>) (TypeSerializer<?>) FloatSerializer.INSTANCE;
        try {
            kvId = new ListStateDescriptor<>("id", fakeStringSerializer);
            state = backend.getPartitionedState(VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE, kvId);
            state.get();
            fail("should recognize wrong serializers");
        } catch (IOException e) {
            if (!e.getMessage().contains("Trying to access state using wrong")) {
                fail("wrong exception " + e);
            }
        // expected
        } catch (Exception e) {
            fail("wrong exception " + e);
        }
        backend.dispose();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : BlockerCheckpointStreamFactory(org.apache.flink.runtime.util.BlockerCheckpointStreamFactory) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) IOException(java.io.IOException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Test(org.junit.Test)

Example 18 with ListStateDescriptor

use of org.apache.flink.api.common.state.ListStateDescriptor in project flink by apache.

the class OperatorStateBackendTest method testSnapshotRestore.

@Test
public void testSnapshotRestore() throws Exception {
    DefaultOperatorStateBackend operatorStateBackend = createNewOperatorStateBackend();
    ListStateDescriptor<Serializable> stateDescriptor1 = new ListStateDescriptor<>("test1", new JavaSerializer<>());
    ListStateDescriptor<Serializable> stateDescriptor2 = new ListStateDescriptor<>("test2", new JavaSerializer<>());
    ListStateDescriptor<Serializable> stateDescriptor3 = new ListStateDescriptor<>("test3", new JavaSerializer<>());
    ListState<Serializable> listState1 = operatorStateBackend.getOperatorState(stateDescriptor1);
    ListState<Serializable> listState2 = operatorStateBackend.getOperatorState(stateDescriptor2);
    ListState<Serializable> listState3 = operatorStateBackend.getBroadcastOperatorState(stateDescriptor3);
    listState1.add(42);
    listState1.add(4711);
    listState2.add(7);
    listState2.add(13);
    listState2.add(23);
    listState3.add(17);
    listState3.add(18);
    listState3.add(19);
    listState3.add(20);
    CheckpointStreamFactory streamFactory = abstractStateBackend.createStreamFactory(new JobID(), "testOperator");
    OperatorStateHandle stateHandle = FutureUtil.runIfNotDoneAndGet(operatorStateBackend.snapshot(1, 1, streamFactory, CheckpointOptions.forFullCheckpoint()));
    try {
        operatorStateBackend.close();
        operatorStateBackend.dispose();
        //TODO this is temporarily casted to test already functionality that we do not yet expose through public API
        operatorStateBackend = (DefaultOperatorStateBackend) abstractStateBackend.createOperatorStateBackend(createMockEnvironment(), "testOperator");
        operatorStateBackend.restore(Collections.singletonList(stateHandle));
        assertEquals(3, operatorStateBackend.getRegisteredStateNames().size());
        listState1 = operatorStateBackend.getOperatorState(stateDescriptor1);
        listState2 = operatorStateBackend.getOperatorState(stateDescriptor2);
        listState3 = operatorStateBackend.getBroadcastOperatorState(stateDescriptor3);
        assertEquals(3, operatorStateBackend.getRegisteredStateNames().size());
        Iterator<Serializable> it = listState1.get().iterator();
        assertEquals(42, it.next());
        assertEquals(4711, it.next());
        assertTrue(!it.hasNext());
        it = listState2.get().iterator();
        assertEquals(7, it.next());
        assertEquals(13, it.next());
        assertEquals(23, it.next());
        assertTrue(!it.hasNext());
        it = listState3.get().iterator();
        assertEquals(17, it.next());
        assertEquals(18, it.next());
        assertEquals(19, it.next());
        assertEquals(20, it.next());
        assertTrue(!it.hasNext());
        operatorStateBackend.close();
        operatorStateBackend.dispose();
    } finally {
        stateHandle.discardState();
    }
}
Also used : Serializable(java.io.Serializable) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with ListStateDescriptor

use of org.apache.flink.api.common.state.ListStateDescriptor in project flink by apache.

the class AllWindowedStream method aggregate.

/**
	 * Applies the given window function to each window. The window function is called for each
	 * evaluation of the window for each key individually. The output of the window function is
	 * interpreted as a regular non-windowed stream.
	 *
	 * <p>Arriving data is incrementally aggregated using the given aggregate function. This means
	 * that the window function typically has only a single value to process when called.
	 *
	 * @param aggregateFunction The aggregation function that is used for incremental aggregation.
	 * @param windowFunction The window function.
	 * @param accumulatorType Type information for the internal accumulator type of the aggregation function
	 * @param resultType Type information for the result type of the window function
	 *
	 * @return The data stream that is the result of applying the window function to the window.
	 *
	 * @param <ACC> The type of the AggregateFunction's accumulator
	 * @param <V> The type of AggregateFunction's result, and the WindowFunction's input  
	 * @param <R> The type of the elements in the resulting stream, equal to the
	 *            WindowFunction's result type
	 */
@PublicEvolving
public <ACC, V, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, V> aggregateFunction, AllWindowFunction<V, R, W> windowFunction, TypeInformation<ACC> accumulatorType, TypeInformation<V> aggregateResultType, TypeInformation<R> resultType) {
    checkNotNull(aggregateFunction, "aggregateFunction");
    checkNotNull(windowFunction, "windowFunction");
    checkNotNull(accumulatorType, "accumulatorType");
    checkNotNull(aggregateResultType, "aggregateResultType");
    checkNotNull(resultType, "resultType");
    if (aggregateFunction instanceof RichFunction) {
        throw new UnsupportedOperationException("This aggregate function cannot be a RichFunction.");
    }
    //clean the closures
    windowFunction = input.getExecutionEnvironment().clean(windowFunction);
    aggregateFunction = input.getExecutionEnvironment().clean(aggregateFunction);
    final String callLocation = Utils.getCallLocationName();
    final String udfName = "AllWindowedStream." + callLocation;
    final String opName;
    final KeySelector<T, Byte> keySel = input.getKeySelector();
    OneInputStreamOperator<T, R> operator;
    if (evictor != null) {
        @SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
        ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
        operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction<>(new AggregateApplyAllWindowFunction<>(aggregateFunction, windowFunction)), trigger, evictor, allowedLateness, lateDataOutputTag);
    } else {
        AggregatingStateDescriptor<T, ACC, V> stateDesc = new AggregatingStateDescriptor<>("window-contents", aggregateFunction, accumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
        operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
    }
    return input.transform(opName, resultType, operator).forceNonParallel();
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichFunction(org.apache.flink.api.common.functions.RichFunction) AggregatingStateDescriptor(org.apache.flink.api.common.state.AggregatingStateDescriptor) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) InternalSingleValueAllWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueAllWindowFunction) InternalIterableAllWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableAllWindowFunction) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) PublicEvolving(org.apache.flink.annotation.PublicEvolving)

Example 20 with ListStateDescriptor

use of org.apache.flink.api.common.state.ListStateDescriptor in project flink by apache.

the class WindowedStream method apply.

/**
	 * Applies the given window function to each window. The window function is called for each
	 * evaluation of the window for each key individually. The output of the window function is
	 * interpreted as a regular non-windowed stream.
	 *
	 * <p>
	 * Arriving data is incrementally aggregated using the given fold function.
	 *
	 * @param initialValue The initial value of the fold.
	 * @param foldFunction The fold function that is used for incremental aggregation.
	 * @param function The window function.
	 * @param resultType Type information for the result type of the window function
	 * @return The data stream that is the result of applying the window function to the window.
	 *
	 * @deprecated Use {@link #fold(Object, FoldFunction, WindowFunction, TypeInformation, TypeInformation)} instead.
	 */
@Deprecated
public <R> SingleOutputStreamOperator<R> apply(R initialValue, FoldFunction<T, R> foldFunction, WindowFunction<R, R, K, W> function, TypeInformation<R> resultType) {
    if (foldFunction instanceof RichFunction) {
        throw new UnsupportedOperationException("FoldFunction of apply can not be a RichFunction.");
    }
    if (windowAssigner instanceof MergingWindowAssigner) {
        throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
    }
    //clean the closures
    function = input.getExecutionEnvironment().clean(function);
    foldFunction = input.getExecutionEnvironment().clean(foldFunction);
    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowedStream." + callLocation;
    String opName;
    KeySelector<T, K> keySel = input.getKeySelector();
    OneInputStreamOperator<T, R> operator;
    if (evictor != null) {
        @SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
        ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
        operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new FoldApplyWindowFunction<>(initialValue, foldFunction, function, resultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
    } else {
        FoldingStateDescriptor<T, R> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, resultType.createSerializer(getExecutionEnvironment().getConfig()));
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
        operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
    }
    return input.transform(opName, resultType, operator);
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichFunction(org.apache.flink.api.common.functions.RichFunction) InternalSingleValueWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueWindowFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) InternalIterableWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableWindowFunction) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer)

Aggregations

ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)82 Test (org.junit.Test)60 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)49 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)37 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)33 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)32 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)31 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)29 StreamElementSerializer (org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer)27 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)27 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)19 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)18 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)18 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)17 RichFunction (org.apache.flink.api.common.functions.RichFunction)16 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)15 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)12 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)11 PublicEvolving (org.apache.flink.annotation.PublicEvolving)9 Watermark (org.apache.flink.streaming.api.watermark.Watermark)9