Search in sources :

Example 66 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class WindowedStream method aggregate.

/**
	 * Applies the given window function to each window. The window function is called for each
	 * evaluation of the window for each key individually. The output of the window function is
	 * interpreted as a regular non-windowed stream.
	 *
	 * <p>Arriving data is incrementally aggregated using the given aggregate function. This means
	 * that the window function typically has only a single value to process when called.
	 *
	 * @param aggregateFunction The aggregation function that is used for incremental aggregation.
	 * @param windowFunction The window function.
	 * @param accumulatorType Type information for the internal accumulator type of the aggregation function
	 * @param resultType Type information for the result type of the window function
	 *
	 * @return The data stream that is the result of applying the window function to the window.
	 *
	 * @param <ACC> The type of the AggregateFunction's accumulator
	 * @param <V> The type of AggregateFunction's result, and the WindowFunction's input
	 * @param <R> The type of the elements in the resulting stream, equal to the
	 *            WindowFunction's result type
	 */
@PublicEvolving
public <ACC, V, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, V> aggregateFunction, WindowFunction<V, R, K, W> windowFunction, TypeInformation<ACC> accumulatorType, TypeInformation<V> aggregateResultType, TypeInformation<R> resultType) {
    checkNotNull(aggregateFunction, "aggregateFunction");
    checkNotNull(windowFunction, "windowFunction");
    checkNotNull(accumulatorType, "accumulatorType");
    checkNotNull(aggregateResultType, "aggregateResultType");
    checkNotNull(resultType, "resultType");
    if (aggregateFunction instanceof RichFunction) {
        throw new UnsupportedOperationException("This aggregate function cannot be a RichFunction.");
    }
    //clean the closures
    windowFunction = input.getExecutionEnvironment().clean(windowFunction);
    aggregateFunction = input.getExecutionEnvironment().clean(aggregateFunction);
    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowedStream." + callLocation;
    String opName;
    KeySelector<T, K> keySel = input.getKeySelector();
    OneInputStreamOperator<T, R> operator;
    if (evictor != null) {
        @SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
        ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
        operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new AggregateApplyWindowFunction<>(aggregateFunction, windowFunction)), trigger, evictor, allowedLateness, lateDataOutputTag);
    } else {
        AggregatingStateDescriptor<T, ACC, V> stateDesc = new AggregatingStateDescriptor<>("window-contents", aggregateFunction, accumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
        operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
    }
    return input.transform(opName, resultType, operator);
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichFunction(org.apache.flink.api.common.functions.RichFunction) AggregatingStateDescriptor(org.apache.flink.api.common.state.AggregatingStateDescriptor) InternalSingleValueWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueWindowFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) InternalIterableWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableWindowFunction) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) PublicEvolving(org.apache.flink.annotation.PublicEvolving)

Example 67 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class OperatorChain method createOutputCollector.

// ------------------------------------------------------------------------
//  initialization utilities
// ------------------------------------------------------------------------
private <T> Output<StreamRecord<T>> createOutputCollector(StreamTask<?, ?> containingTask, StreamConfig operatorConfig, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, Map<StreamEdge, RecordWriterOutput<?>> streamOutputs, List<StreamOperator<?>> allOperators) {
    List<Tuple2<Output<StreamRecord<T>>, StreamEdge>> allOutputs = new ArrayList<>(4);
    // create collectors for the network outputs
    for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) {
        @SuppressWarnings("unchecked") RecordWriterOutput<T> output = (RecordWriterOutput<T>) streamOutputs.get(outputEdge);
        allOutputs.add(new Tuple2<Output<StreamRecord<T>>, StreamEdge>(output, outputEdge));
    }
    // Create collectors for the chained outputs
    for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) {
        int outputId = outputEdge.getTargetId();
        StreamConfig chainedOpConfig = chainedConfigs.get(outputId);
        Output<StreamRecord<T>> output = createChainedOperator(containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators, outputEdge.getOutputTag());
        allOutputs.add(new Tuple2<>(output, outputEdge));
    }
    // if there are multiple outputs, or the outputs are directed, we need to
    // wrap them as one output
    List<OutputSelector<T>> selectors = operatorConfig.getOutputSelectors(userCodeClassloader);
    if (selectors == null || selectors.isEmpty()) {
        // simple path, no selector necessary
        if (allOutputs.size() == 1) {
            return allOutputs.get(0).f0;
        } else {
            // send to N outputs. Note that this includes teh special case
            // of sending to zero outputs
            @SuppressWarnings({ "unchecked", "rawtypes" }) Output<StreamRecord<T>>[] asArray = new Output[allOutputs.size()];
            for (int i = 0; i < allOutputs.size(); i++) {
                asArray[i] = allOutputs.get(i).f0;
            }
            // otherwise multi-chaining would not work correctly.
            if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
                return new CopyingBroadcastingOutputCollector<>(asArray, this);
            } else {
                return new BroadcastingOutputCollector<>(asArray, this);
            }
        }
    } else {
        // otherwise multi-chaining would not work correctly.
        if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
            return new CopyingDirectedOutput<>(selectors, allOutputs);
        } else {
            return new DirectedOutput<>(selectors, allOutputs);
        }
    }
}
Also used : CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) DirectedOutput(org.apache.flink.streaming.api.collector.selector.DirectedOutput) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Output(org.apache.flink.streaming.api.operators.Output) CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) DirectedOutput(org.apache.flink.streaming.api.collector.selector.DirectedOutput) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput)

Example 68 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class EvictingWindowOperator method onEventTime.

@Override
public void onEventTime(InternalTimer<K, W> timer) throws Exception {
    context.key = timer.getKey();
    context.window = timer.getNamespace();
    evictorContext.key = timer.getKey();
    evictorContext.window = timer.getNamespace();
    MergingWindowSet<W> mergingWindows = null;
    if (windowAssigner instanceof MergingWindowAssigner) {
        mergingWindows = getMergingWindowSet();
        W stateWindow = mergingWindows.getStateWindow(context.window);
        if (stateWindow == null) {
            // window and therefore the Trigger state, however, so nothing to do.
            return;
        } else {
            evictingWindowState.setCurrentNamespace(stateWindow);
        }
    } else {
        evictingWindowState.setCurrentNamespace(context.window);
    }
    Iterable<StreamRecord<IN>> contents = evictingWindowState.get();
    if (contents != null) {
        TriggerResult triggerResult = context.onEventTime(timer.getTimestamp());
        if (triggerResult.isFire()) {
            emitWindowContents(context.window, contents, evictingWindowState);
        }
        if (triggerResult.isPurge()) {
            evictingWindowState.clear();
        }
    }
    if (windowAssigner.isEventTime() && isCleanupTime(context.window, timer.getTimestamp())) {
        clearAllState(context.window, evictingWindowState, mergingWindows);
    }
    if (mergingWindows != null) {
        // need to make sure to update the merging state in state
        mergingWindows.persist();
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TriggerResult(org.apache.flink.streaming.api.windowing.triggers.TriggerResult) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner)

Example 69 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class AsyncWaitOperatorTest method testClosingWithBlockedEmitter.

/**
	 * Test case for FLINK-5638: Tests that the async wait operator can be closed even if the
	 * emitter is currently waiting on the checkpoint lock (e.g. in the case of two chained async
	 * wait operators where the latter operator's queue is currently full).
	 *
	 * Note that this test does not enforce the exact strict ordering because with the fix it is no
	 * longer possible. However, it provokes the described situation without the fix.
	 */
@Test(timeout = 10000L)
public void testClosingWithBlockedEmitter() throws Exception {
    final Object lock = new Object();
    ArgumentCaptor<Throwable> failureReason = ArgumentCaptor.forClass(Throwable.class);
    Environment environment = mock(Environment.class);
    when(environment.getMetricGroup()).thenReturn(new UnregisteredTaskMetricsGroup());
    when(environment.getTaskManagerInfo()).thenReturn(new TestingTaskManagerRuntimeInfo());
    when(environment.getUserClassLoader()).thenReturn(getClass().getClassLoader());
    when(environment.getTaskInfo()).thenReturn(new TaskInfo("testTask", 1, 0, 1, 0));
    doNothing().when(environment).failExternally(failureReason.capture());
    StreamTask<?, ?> containingTask = mock(StreamTask.class);
    when(containingTask.getEnvironment()).thenReturn(environment);
    when(containingTask.getCheckpointLock()).thenReturn(lock);
    when(containingTask.getProcessingTimeService()).thenReturn(new TestProcessingTimeService());
    StreamConfig streamConfig = mock(StreamConfig.class);
    doReturn(IntSerializer.INSTANCE).when(streamConfig).getTypeSerializerIn1(any(ClassLoader.class));
    final OneShotLatch closingLatch = new OneShotLatch();
    final OneShotLatch outputLatch = new OneShotLatch();
    Output<StreamRecord<Integer>> output = mock(Output.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            assertTrue("Output should happen under the checkpoint lock.", Thread.currentThread().holdsLock(lock));
            outputLatch.trigger();
            // wait until we're in the closing method of the operator
            while (!closingLatch.isTriggered()) {
                lock.wait();
            }
            return null;
        }
    }).when(output).collect(any(StreamRecord.class));
    AsyncWaitOperator<Integer, Integer> operator = new TestAsyncWaitOperator<>(new MyAsyncFunction(), 1000L, 1, AsyncDataStream.OutputMode.ORDERED, closingLatch);
    operator.setup(containingTask, streamConfig, output);
    operator.open();
    synchronized (lock) {
        operator.processElement(new StreamRecord<>(42));
    }
    outputLatch.await();
    synchronized (lock) {
        operator.close();
    }
    // check that no concurrent exception has occurred
    try {
        verify(environment, never()).failExternally(any(Throwable.class));
    } catch (Error e) {
        // add the exception occurring in the emitter thread (root cause) as a suppressed
        // exception
        e.addSuppressed(failureReason.getValue());
        throw e;
    }
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Environment(org.apache.flink.runtime.execution.Environment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamMockEnvironment(org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 70 with StreamRecord

use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.

the class EmitterTest method testEmitterWithOrderedQueue.

/**
	 * Tests that the emitter outputs completed stream element queue entries.
	 */
@Test
public void testEmitterWithOrderedQueue() throws Exception {
    Object lock = new Object();
    List<StreamElement> list = new ArrayList<>();
    Output<StreamRecord<Integer>> output = new CollectorOutput<>(list);
    List<StreamElement> expected = Arrays.asList(new StreamRecord<>(1, 0L), new StreamRecord<>(2, 0L), new StreamRecord<>(3, 1L), new StreamRecord<>(4, 1L), new Watermark(3L), new StreamRecord<>(5, 4L), new StreamRecord<>(6, 4L));
    OperatorActions operatorActions = mock(OperatorActions.class);
    final int capacity = 5;
    StreamElementQueue queue = new OrderedStreamElementQueue(capacity, executor, operatorActions);
    final Emitter<Integer> emitter = new Emitter<>(lock, output, queue, operatorActions);
    final Thread emitterThread = new Thread(emitter);
    emitterThread.start();
    try {
        StreamRecordQueueEntry<Integer> record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L));
        StreamRecordQueueEntry<Integer> record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L));
        WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(3L));
        StreamRecordQueueEntry<Integer> record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 4L));
        queue.put(record1);
        queue.put(record2);
        queue.put(watermark1);
        queue.put(record3);
        record2.collect(Arrays.asList(3, 4));
        record1.collect(Arrays.asList(1, 2));
        record3.collect(Arrays.asList(5, 6));
        synchronized (lock) {
            while (!queue.isEmpty()) {
                lock.wait();
            }
        }
        Assert.assertEquals(expected, list);
    } finally {
        emitter.stop();
        emitterThread.interrupt();
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) StreamElementQueue(org.apache.flink.streaming.api.operators.async.queue.StreamElementQueue) OrderedStreamElementQueue(org.apache.flink.streaming.api.operators.async.queue.OrderedStreamElementQueue) OrderedStreamElementQueue(org.apache.flink.streaming.api.operators.async.queue.OrderedStreamElementQueue) CollectorOutput(org.apache.flink.streaming.util.CollectorOutput) Watermark(org.apache.flink.streaming.api.watermark.Watermark) StreamRecordQueueEntry(org.apache.flink.streaming.api.operators.async.queue.StreamRecordQueueEntry) WatermarkQueueEntry(org.apache.flink.streaming.api.operators.async.queue.WatermarkQueueEntry) Test(org.junit.Test)

Aggregations

StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)76 Test (org.junit.Test)50 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)27 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)27 StreamElementSerializer (org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer)27 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)21 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)20 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)19 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)19 Watermark (org.apache.flink.streaming.api.watermark.Watermark)17 RichFunction (org.apache.flink.api.common.functions.RichFunction)16 ArrayList (java.util.ArrayList)14 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)14 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)14 Map (java.util.Map)11 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)11 Event (org.apache.flink.cep.Event)11 HashMap (java.util.HashMap)10 PublicEvolving (org.apache.flink.annotation.PublicEvolving)9 MergingWindowAssigner (org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner)9