use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class WindowedStream method aggregate.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given aggregate function. This means
* that the window function typically has only a single value to process when called.
*
* @param aggregateFunction The aggregation function that is used for incremental aggregation.
* @param windowFunction The window function.
* @param accumulatorType Type information for the internal accumulator type of the aggregation function
* @param resultType Type information for the result type of the window function
*
* @return The data stream that is the result of applying the window function to the window.
*
* @param <ACC> The type of the AggregateFunction's accumulator
* @param <V> The type of AggregateFunction's result, and the WindowFunction's input
* @param <R> The type of the elements in the resulting stream, equal to the
* WindowFunction's result type
*/
@PublicEvolving
public <ACC, V, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, V> aggregateFunction, WindowFunction<V, R, K, W> windowFunction, TypeInformation<ACC> accumulatorType, TypeInformation<V> aggregateResultType, TypeInformation<R> resultType) {
checkNotNull(aggregateFunction, "aggregateFunction");
checkNotNull(windowFunction, "windowFunction");
checkNotNull(accumulatorType, "accumulatorType");
checkNotNull(aggregateResultType, "aggregateResultType");
checkNotNull(resultType, "resultType");
if (aggregateFunction instanceof RichFunction) {
throw new UnsupportedOperationException("This aggregate function cannot be a RichFunction.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
aggregateFunction = input.getExecutionEnvironment().clean(aggregateFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableWindowFunction<>(new AggregateApplyWindowFunction<>(aggregateFunction, windowFunction)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
AggregatingStateDescriptor<T, ACC, V> stateDesc = new AggregatingStateDescriptor<>("window-contents", aggregateFunction, accumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator);
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class OperatorChain method createOutputCollector.
// ------------------------------------------------------------------------
// initialization utilities
// ------------------------------------------------------------------------
private <T> Output<StreamRecord<T>> createOutputCollector(StreamTask<?, ?> containingTask, StreamConfig operatorConfig, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, Map<StreamEdge, RecordWriterOutput<?>> streamOutputs, List<StreamOperator<?>> allOperators) {
List<Tuple2<Output<StreamRecord<T>>, StreamEdge>> allOutputs = new ArrayList<>(4);
// create collectors for the network outputs
for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) {
@SuppressWarnings("unchecked") RecordWriterOutput<T> output = (RecordWriterOutput<T>) streamOutputs.get(outputEdge);
allOutputs.add(new Tuple2<Output<StreamRecord<T>>, StreamEdge>(output, outputEdge));
}
// Create collectors for the chained outputs
for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) {
int outputId = outputEdge.getTargetId();
StreamConfig chainedOpConfig = chainedConfigs.get(outputId);
Output<StreamRecord<T>> output = createChainedOperator(containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators, outputEdge.getOutputTag());
allOutputs.add(new Tuple2<>(output, outputEdge));
}
// if there are multiple outputs, or the outputs are directed, we need to
// wrap them as one output
List<OutputSelector<T>> selectors = operatorConfig.getOutputSelectors(userCodeClassloader);
if (selectors == null || selectors.isEmpty()) {
// simple path, no selector necessary
if (allOutputs.size() == 1) {
return allOutputs.get(0).f0;
} else {
// send to N outputs. Note that this includes teh special case
// of sending to zero outputs
@SuppressWarnings({ "unchecked", "rawtypes" }) Output<StreamRecord<T>>[] asArray = new Output[allOutputs.size()];
for (int i = 0; i < allOutputs.size(); i++) {
asArray[i] = allOutputs.get(i).f0;
}
// otherwise multi-chaining would not work correctly.
if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
return new CopyingBroadcastingOutputCollector<>(asArray, this);
} else {
return new BroadcastingOutputCollector<>(asArray, this);
}
}
} else {
// otherwise multi-chaining would not work correctly.
if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
return new CopyingDirectedOutput<>(selectors, allOutputs);
} else {
return new DirectedOutput<>(selectors, allOutputs);
}
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class EvictingWindowOperator method onEventTime.
@Override
public void onEventTime(InternalTimer<K, W> timer) throws Exception {
context.key = timer.getKey();
context.window = timer.getNamespace();
evictorContext.key = timer.getKey();
evictorContext.window = timer.getNamespace();
MergingWindowSet<W> mergingWindows = null;
if (windowAssigner instanceof MergingWindowAssigner) {
mergingWindows = getMergingWindowSet();
W stateWindow = mergingWindows.getStateWindow(context.window);
if (stateWindow == null) {
// window and therefore the Trigger state, however, so nothing to do.
return;
} else {
evictingWindowState.setCurrentNamespace(stateWindow);
}
} else {
evictingWindowState.setCurrentNamespace(context.window);
}
Iterable<StreamRecord<IN>> contents = evictingWindowState.get();
if (contents != null) {
TriggerResult triggerResult = context.onEventTime(timer.getTimestamp());
if (triggerResult.isFire()) {
emitWindowContents(context.window, contents, evictingWindowState);
}
if (triggerResult.isPurge()) {
evictingWindowState.clear();
}
}
if (windowAssigner.isEventTime() && isCleanupTime(context.window, timer.getTimestamp())) {
clearAllState(context.window, evictingWindowState, mergingWindows);
}
if (mergingWindows != null) {
// need to make sure to update the merging state in state
mergingWindows.persist();
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class AsyncWaitOperatorTest method testClosingWithBlockedEmitter.
/**
* Test case for FLINK-5638: Tests that the async wait operator can be closed even if the
* emitter is currently waiting on the checkpoint lock (e.g. in the case of two chained async
* wait operators where the latter operator's queue is currently full).
*
* Note that this test does not enforce the exact strict ordering because with the fix it is no
* longer possible. However, it provokes the described situation without the fix.
*/
@Test(timeout = 10000L)
public void testClosingWithBlockedEmitter() throws Exception {
final Object lock = new Object();
ArgumentCaptor<Throwable> failureReason = ArgumentCaptor.forClass(Throwable.class);
Environment environment = mock(Environment.class);
when(environment.getMetricGroup()).thenReturn(new UnregisteredTaskMetricsGroup());
when(environment.getTaskManagerInfo()).thenReturn(new TestingTaskManagerRuntimeInfo());
when(environment.getUserClassLoader()).thenReturn(getClass().getClassLoader());
when(environment.getTaskInfo()).thenReturn(new TaskInfo("testTask", 1, 0, 1, 0));
doNothing().when(environment).failExternally(failureReason.capture());
StreamTask<?, ?> containingTask = mock(StreamTask.class);
when(containingTask.getEnvironment()).thenReturn(environment);
when(containingTask.getCheckpointLock()).thenReturn(lock);
when(containingTask.getProcessingTimeService()).thenReturn(new TestProcessingTimeService());
StreamConfig streamConfig = mock(StreamConfig.class);
doReturn(IntSerializer.INSTANCE).when(streamConfig).getTypeSerializerIn1(any(ClassLoader.class));
final OneShotLatch closingLatch = new OneShotLatch();
final OneShotLatch outputLatch = new OneShotLatch();
Output<StreamRecord<Integer>> output = mock(Output.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
assertTrue("Output should happen under the checkpoint lock.", Thread.currentThread().holdsLock(lock));
outputLatch.trigger();
// wait until we're in the closing method of the operator
while (!closingLatch.isTriggered()) {
lock.wait();
}
return null;
}
}).when(output).collect(any(StreamRecord.class));
AsyncWaitOperator<Integer, Integer> operator = new TestAsyncWaitOperator<>(new MyAsyncFunction(), 1000L, 1, AsyncDataStream.OutputMode.ORDERED, closingLatch);
operator.setup(containingTask, streamConfig, output);
operator.open();
synchronized (lock) {
operator.processElement(new StreamRecord<>(42));
}
outputLatch.await();
synchronized (lock) {
operator.close();
}
// check that no concurrent exception has occurred
try {
verify(environment, never()).failExternally(any(Throwable.class));
} catch (Error e) {
// add the exception occurring in the emitter thread (root cause) as a suppressed
// exception
e.addSuppressed(failureReason.getValue());
throw e;
}
}
use of org.apache.flink.streaming.runtime.streamrecord.StreamRecord in project flink by apache.
the class EmitterTest method testEmitterWithOrderedQueue.
/**
* Tests that the emitter outputs completed stream element queue entries.
*/
@Test
public void testEmitterWithOrderedQueue() throws Exception {
Object lock = new Object();
List<StreamElement> list = new ArrayList<>();
Output<StreamRecord<Integer>> output = new CollectorOutput<>(list);
List<StreamElement> expected = Arrays.asList(new StreamRecord<>(1, 0L), new StreamRecord<>(2, 0L), new StreamRecord<>(3, 1L), new StreamRecord<>(4, 1L), new Watermark(3L), new StreamRecord<>(5, 4L), new StreamRecord<>(6, 4L));
OperatorActions operatorActions = mock(OperatorActions.class);
final int capacity = 5;
StreamElementQueue queue = new OrderedStreamElementQueue(capacity, executor, operatorActions);
final Emitter<Integer> emitter = new Emitter<>(lock, output, queue, operatorActions);
final Thread emitterThread = new Thread(emitter);
emitterThread.start();
try {
StreamRecordQueueEntry<Integer> record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L));
StreamRecordQueueEntry<Integer> record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L));
WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(3L));
StreamRecordQueueEntry<Integer> record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 4L));
queue.put(record1);
queue.put(record2);
queue.put(watermark1);
queue.put(record3);
record2.collect(Arrays.asList(3, 4));
record1.collect(Arrays.asList(1, 2));
record3.collect(Arrays.asList(5, 6));
synchronized (lock) {
while (!queue.isEmpty()) {
lock.wait();
}
}
Assert.assertEquals(expected, list);
} finally {
emitter.stop();
emitterThread.interrupt();
}
}
Aggregations