Search in sources :

Example 16 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class WindowedStream method fold.

/**
	 * Applies the given window function to each window. The window function is called for each
	 * evaluation of the window for each key individually. The output of the window function is
	 * interpreted as a regular non-windowed stream.
	 *
	 * <p>
	 * Arriving data is incrementally aggregated using the given fold function.
	 *
	 * @param initialValue The initial value of the fold.
	 * @param foldFunction The fold function that is used for incremental aggregation.
	 * @param function The window function.
	 * @param foldAccumulatorType Type information for the result type of the fold function
	 * @param resultType Type information for the result type of the window function
	 * @return The data stream that is the result of applying the window function to the window.
	 */
@PublicEvolving
public <ACC, R> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, WindowFunction<ACC, R, K, W> function, TypeInformation<ACC> foldAccumulatorType, TypeInformation<R> resultType) {
    if (foldFunction instanceof RichFunction) {
        throw new UnsupportedOperationException("FoldFunction of fold can not be a RichFunction.");
    }
    if (windowAssigner instanceof MergingWindowAssigner) {
        throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
    }
    if (windowAssigner instanceof BaseAlignedWindowAssigner) {
        throw new UnsupportedOperationException("Fold cannot be used with a " + windowAssigner.getClass().getSimpleName() + " assigner.");
    }
    //clean the closures
    function = input.getExecutionEnvironment().clean(function);
    foldFunction = input.getExecutionEnvironment().clean(foldFunction);
    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowedStream." + callLocation;
    String opName;
    KeySelector<T, K> keySel = input.getKeySelector();
    OneInputStreamOperator<T, R> operator;
    if (evictor != null) {
        @SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
        ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
        operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new FoldApplyWindowFunction<>(initialValue, foldFunction, function, foldAccumulatorType)), trigger, evictor, allowedLateness, lateDataOutputTag);
    } else {
        FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldAccumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
        operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
    }
    return input.transform(opName, resultType, operator);
}
Also used : InternalSingleValueWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueWindowFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) InternalIterableWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableWindowFunction) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichFunction(org.apache.flink.api.common.functions.RichFunction) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner) BaseAlignedWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.BaseAlignedWindowAssigner) PublicEvolving(org.apache.flink.annotation.PublicEvolving)

Example 17 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class EvictingWindowOperator method processElement.

@Override
public void processElement(StreamRecord<IN> element) throws Exception {
    Collection<W> elementWindows = windowAssigner.assignWindows(element.getValue(), element.getTimestamp(), windowAssignerContext);
    @SuppressWarnings("unchecked") final K key = (K) getKeyedStateBackend().getCurrentKey();
    if (windowAssigner instanceof MergingWindowAssigner) {
        MergingWindowSet<W> mergingWindows = getMergingWindowSet();
        for (W window : elementWindows) {
            // adding the new window might result in a merge, in that case the actualWindow
            // is the merged window and we work with that. If we don't merge then
            // actualWindow == window
            W actualWindow = mergingWindows.addWindow(window, new MergingWindowSet.MergeFunction<W>() {

                @Override
                public void merge(W mergeResult, Collection<W> mergedWindows, W stateWindowResult, Collection<W> mergedStateWindows) throws Exception {
                    context.key = key;
                    context.window = mergeResult;
                    context.onMerge(mergedWindows);
                    for (W m : mergedWindows) {
                        context.window = m;
                        context.clear();
                        deleteCleanupTimer(m);
                    }
                    // merge the merged state windows into the newly resulting state window
                    evictingWindowState.mergeNamespaces(stateWindowResult, mergedStateWindows);
                }
            });
            // check if the window is already inactive
            if (isWindowLate(actualWindow)) {
                mergingWindows.retireWindow(actualWindow);
                continue;
            }
            W stateWindow = mergingWindows.getStateWindow(actualWindow);
            if (stateWindow == null) {
                throw new IllegalStateException("Window " + window + " is not in in-flight window set.");
            }
            evictingWindowState.setCurrentNamespace(stateWindow);
            evictingWindowState.add(element);
            context.key = key;
            context.window = actualWindow;
            evictorContext.key = key;
            evictorContext.window = actualWindow;
            TriggerResult triggerResult = context.onElement(element);
            if (triggerResult.isFire()) {
                Iterable<StreamRecord<IN>> contents = evictingWindowState.get();
                if (contents == null) {
                    // if we have no state, there is nothing to do
                    continue;
                }
                emitWindowContents(actualWindow, contents, evictingWindowState);
            }
            if (triggerResult.isPurge()) {
                evictingWindowState.clear();
            }
            registerCleanupTimer(actualWindow);
        }
        mergingWindows.persist();
    } else {
        for (W window : elementWindows) {
            // check if the window is already inactive
            if (isWindowLate(window)) {
                continue;
            }
            evictingWindowState.setCurrentNamespace(window);
            evictingWindowState.add(element);
            context.key = key;
            context.window = window;
            evictorContext.key = key;
            evictorContext.window = window;
            TriggerResult triggerResult = context.onElement(element);
            if (triggerResult.isFire()) {
                Iterable<StreamRecord<IN>> contents = evictingWindowState.get();
                if (contents == null) {
                    // if we have no state, there is nothing to do
                    continue;
                }
                emitWindowContents(window, contents, evictingWindowState);
            }
            if (triggerResult.isPurge()) {
                evictingWindowState.clear();
            }
            registerCleanupTimer(window);
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner) TriggerResult(org.apache.flink.streaming.api.windowing.triggers.TriggerResult)

Example 18 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class EvictingWindowOperator method onProcessingTime.

@Override
public void onProcessingTime(InternalTimer<K, W> timer) throws Exception {
    context.key = timer.getKey();
    context.window = timer.getNamespace();
    evictorContext.key = timer.getKey();
    evictorContext.window = timer.getNamespace();
    MergingWindowSet<W> mergingWindows = null;
    if (windowAssigner instanceof MergingWindowAssigner) {
        mergingWindows = getMergingWindowSet();
        W stateWindow = mergingWindows.getStateWindow(context.window);
        if (stateWindow == null) {
            // window and therefore the Trigger state, however, so nothing to do.
            return;
        } else {
            evictingWindowState.setCurrentNamespace(stateWindow);
        }
    } else {
        evictingWindowState.setCurrentNamespace(context.window);
    }
    Iterable<StreamRecord<IN>> contents = evictingWindowState.get();
    if (contents != null) {
        TriggerResult triggerResult = context.onProcessingTime(timer.getTimestamp());
        if (triggerResult.isFire()) {
            emitWindowContents(context.window, contents, evictingWindowState);
        }
        if (triggerResult.isPurge()) {
            evictingWindowState.clear();
        }
    }
    if (!windowAssigner.isEventTime() && isCleanupTime(context.window, timer.getTimestamp())) {
        clearAllState(context.window, evictingWindowState, mergingWindows);
    }
    if (mergingWindows != null) {
        // need to make sure to update the merging state in state
        mergingWindows.persist();
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TriggerResult(org.apache.flink.streaming.api.windowing.triggers.TriggerResult) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner)

Example 19 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class StreamIterationHead method run.

// ------------------------------------------------------------------------
@Override
protected void run() throws Exception {
    final String iterationId = getConfiguration().getIterationId();
    if (iterationId == null || iterationId.length() == 0) {
        throw new Exception("Missing iteration ID in the task configuration");
    }
    final String brokerID = createBrokerIdString(getEnvironment().getJobID(), iterationId, getEnvironment().getTaskInfo().getIndexOfThisSubtask());
    final long iterationWaitTime = getConfiguration().getIterationWaitTime();
    final boolean shouldWait = iterationWaitTime > 0;
    final BlockingQueue<StreamRecord<OUT>> dataChannel = new ArrayBlockingQueue<StreamRecord<OUT>>(1);
    // offer the queue for the tail
    BlockingQueueBroker.INSTANCE.handIn(brokerID, dataChannel);
    LOG.info("Iteration head {} added feedback queue under {}", getName(), brokerID);
    // do the work 
    try {
        @SuppressWarnings("unchecked") RecordWriterOutput<OUT>[] outputs = (RecordWriterOutput<OUT>[]) getStreamOutputs();
        // If timestamps are enabled we make sure to remove cyclic watermark dependencies
        if (isSerializingTimestamps()) {
            for (RecordWriterOutput<OUT> output : outputs) {
                output.emitWatermark(new Watermark(Long.MAX_VALUE));
            }
        }
        while (running) {
            StreamRecord<OUT> nextRecord = shouldWait ? dataChannel.poll(iterationWaitTime, TimeUnit.MILLISECONDS) : dataChannel.take();
            if (nextRecord != null) {
                for (RecordWriterOutput<OUT> output : outputs) {
                    output.collect(nextRecord);
                }
            } else {
                // done
                break;
            }
        }
    } finally {
        // make sure that we remove the queue from the broker, to prevent a resource leak
        BlockingQueueBroker.INSTANCE.remove(brokerID);
        LOG.info("Iteration head {} removed feedback queue under {}", getName(), brokerID);
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 20 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class AsyncWaitOperatorTest method testTimeoutCleanup.

/**
	 * FLINK-5652
	 * Tests that registered timers are properly canceled upon completion of a
	 * {@link StreamRecordQueueEntry} in order to avoid resource leaks because TriggerTasks hold
	 * a reference on the StreamRecordQueueEntry.
	 */
@Test
public void testTimeoutCleanup() throws Exception {
    final Object lock = new Object();
    final long timeout = 100000L;
    final long timestamp = 1L;
    Environment environment = mock(Environment.class);
    when(environment.getMetricGroup()).thenReturn(new UnregisteredTaskMetricsGroup());
    when(environment.getTaskManagerInfo()).thenReturn(new TestingTaskManagerRuntimeInfo());
    when(environment.getUserClassLoader()).thenReturn(getClass().getClassLoader());
    when(environment.getTaskInfo()).thenReturn(new TaskInfo("testTask", 1, 0, 1, 0));
    ScheduledFuture<?> scheduledFuture = mock(ScheduledFuture.class);
    ProcessingTimeService processingTimeService = mock(ProcessingTimeService.class);
    when(processingTimeService.getCurrentProcessingTime()).thenReturn(timestamp);
    doReturn(scheduledFuture).when(processingTimeService).registerTimer(anyLong(), any(ProcessingTimeCallback.class));
    StreamTask<?, ?> containingTask = mock(StreamTask.class);
    when(containingTask.getEnvironment()).thenReturn(environment);
    when(containingTask.getCheckpointLock()).thenReturn(lock);
    when(containingTask.getProcessingTimeService()).thenReturn(processingTimeService);
    StreamConfig streamConfig = mock(StreamConfig.class);
    doReturn(IntSerializer.INSTANCE).when(streamConfig).getTypeSerializerIn1(any(ClassLoader.class));
    Output<StreamRecord<Integer>> output = mock(Output.class);
    AsyncWaitOperator<Integer, Integer> operator = new AsyncWaitOperator<>(new AsyncFunction<Integer, Integer>() {

        private static final long serialVersionUID = -3718276118074877073L;

        @Override
        public void asyncInvoke(Integer input, AsyncCollector<Integer> collector) throws Exception {
            collector.collect(Collections.singletonList(input));
        }
    }, timeout, 1, AsyncDataStream.OutputMode.UNORDERED);
    operator.setup(containingTask, streamConfig, output);
    operator.open();
    final StreamRecord<Integer> streamRecord = new StreamRecord<>(42, timestamp);
    synchronized (lock) {
        // processing an element will register a timeout
        operator.processElement(streamRecord);
    }
    synchronized (lock) {
        // closing the operator waits until all inputs have been processed
        operator.close();
    }
    // check that we actually outputted the result of the single input
    verify(output).collect(eq(streamRecord));
    verify(processingTimeService).registerTimer(eq(processingTimeService.getCurrentProcessingTime() + timeout), any(ProcessingTimeCallback.class));
    // check that we have cancelled our registered timeout
    verify(scheduledFuture).cancel(eq(true));
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) TimeoutException(java.util.concurrent.TimeoutException) ExecutionException(java.util.concurrent.ExecutionException) TaskInfo(org.apache.flink.api.common.TaskInfo) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) ProcessingTimeCallback(org.apache.flink.streaming.runtime.tasks.ProcessingTimeCallback) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) Environment(org.apache.flink.runtime.execution.Environment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamMockEnvironment(org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment) Test(org.junit.Test)

Aggregations

StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)76 Test (org.junit.Test)50 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)27 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)27 StreamElementSerializer (org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer)27 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)21 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)20 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)19 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)19 Watermark (org.apache.flink.streaming.api.watermark.Watermark)17 RichFunction (org.apache.flink.api.common.functions.RichFunction)16 ArrayList (java.util.ArrayList)14 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)14 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)14 Map (java.util.Map)11 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)11 Event (org.apache.flink.cep.Event)11 HashMap (java.util.HashMap)10 PublicEvolving (org.apache.flink.annotation.PublicEvolving)9 MergingWindowAssigner (org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner)9