use of org.apache.flink.annotation.PublicEvolving in project flink by apache.
the class AllWindowedStream method aggregate.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given aggregate function. This means
* that the window function typically has only a single value to process when called.
*
* @param aggregateFunction The aggregation function that is used for incremental aggregation.
* @param windowFunction The window function.
* @param accumulatorType Type information for the internal accumulator type of the aggregation function
* @param resultType Type information for the result type of the window function
*
* @return The data stream that is the result of applying the window function to the window.
*
* @param <ACC> The type of the AggregateFunction's accumulator
* @param <V> The type of AggregateFunction's result, and the WindowFunction's input
* @param <R> The type of the elements in the resulting stream, equal to the
* WindowFunction's result type
*/
@PublicEvolving
public <ACC, V, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, V> aggregateFunction, AllWindowFunction<V, R, W> windowFunction, TypeInformation<ACC> accumulatorType, TypeInformation<V> aggregateResultType, TypeInformation<R> resultType) {
checkNotNull(aggregateFunction, "aggregateFunction");
checkNotNull(windowFunction, "windowFunction");
checkNotNull(accumulatorType, "accumulatorType");
checkNotNull(aggregateResultType, "aggregateResultType");
checkNotNull(resultType, "resultType");
if (aggregateFunction instanceof RichFunction) {
throw new UnsupportedOperationException("This aggregate function cannot be a RichFunction.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
aggregateFunction = input.getExecutionEnvironment().clean(aggregateFunction);
final String callLocation = Utils.getCallLocationName();
final String udfName = "AllWindowedStream." + callLocation;
final String opName;
final KeySelector<T, Byte> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction<>(new AggregateApplyAllWindowFunction<>(aggregateFunction, windowFunction)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
AggregatingStateDescriptor<T, ACC, V> stateDesc = new AggregatingStateDescriptor<>("window-contents", aggregateFunction, accumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator).forceNonParallel();
}
use of org.apache.flink.annotation.PublicEvolving in project flink by apache.
the class StreamExecutionEnvironment method readFile.
/**
*
* Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
* on the provided {@link FileProcessingMode}.
* <p>
* See {@link #readFile(FileInputFormat, String, FileProcessingMode, long)}
*
* @param inputFormat
* The input format used to create the data stream
* @param filePath
* The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
* @param watchType
* The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
* @param interval
* In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
* @param filter
* The files to be excluded from the processing
* @param <OUT>
* The type of the returned data stream
* @return The data stream that represents the data read from the given file
*
* @deprecated Use {@link FileInputFormat#setFilesFilter(FilePathFilter)} to set a filter and
* {@link StreamExecutionEnvironment#readFile(FileInputFormat, String, FileProcessingMode, long)}
*
*/
@PublicEvolving
@Deprecated
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat, String filePath, FileProcessingMode watchType, long interval, FilePathFilter filter) {
inputFormat.setFilesFilter(filter);
TypeInformation<OUT> typeInformation;
try {
typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
} catch (Exception e) {
throw new InvalidProgramException("The type returned by the input format could not be " + "automatically determined. Please specify the TypeInformation of the produced type " + "explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
}
return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
use of org.apache.flink.annotation.PublicEvolving in project flink by apache.
the class AllWindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>
* Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue The initial value of the fold.
* @param foldFunction The fold function that is used for incremental aggregation.
* @param function The window function.
* @param foldAccumulatorType Type information for the result type of the fold function
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@PublicEvolving
public <ACC, R> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, AllWindowFunction<ACC, R, W> function, TypeInformation<ACC> foldAccumulatorType, TypeInformation<R> resultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction of fold can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "AllWindowedStream." + callLocation;
String opName;
KeySelector<T, Byte> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction<>(new FoldApplyAllWindowFunction<>(initialValue, foldFunction, function, foldAccumulatorType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldAccumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator).forceNonParallel();
}
use of org.apache.flink.annotation.PublicEvolving in project flink by apache.
the class AllWindowedStream method reduce.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>
* Arriving data is incrementally aggregated using the given reducer.
*
* @param reduceFunction The reduce function that is used for incremental aggregation.
* @param function The process window function.
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@PublicEvolving
public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, ProcessAllWindowFunction<T, R, W> function, TypeInformation<R> resultType) {
if (reduceFunction instanceof RichFunction) {
throw new UnsupportedOperationException("ReduceFunction of reduce can not be a RichFunction.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
reduceFunction = input.getExecutionEnvironment().clean(reduceFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "AllWindowedStream." + callLocation;
String opName;
KeySelector<T, Byte> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessAllWindowFunction<>(new ReduceApplyProcessAllWindowFunction<>(reduceFunction, function)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
ReducingStateDescriptor<T> stateDesc = new ReducingStateDescriptor<>("window-contents", reduceFunction, input.getType().createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessAllWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator).forceNonParallel();
}
use of org.apache.flink.annotation.PublicEvolving in project flink by apache.
the class WindowedStream method aggregate.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given aggregate function. This means
* that the window function typically has only a single value to process when called.
*
* @param aggregateFunction The aggregation function that is used for incremental aggregation.
* @param windowFunction The window function.
* @param accumulatorType Type information for the internal accumulator type of the aggregation function
* @param resultType Type information for the result type of the window function
*
* @return The data stream that is the result of applying the window function to the window.
*
* @param <ACC> The type of the AggregateFunction's accumulator
* @param <V> The type of AggregateFunction's result, and the WindowFunction's input
* @param <R> The type of the elements in the resulting stream, equal to the
* WindowFunction's result type
*/
@PublicEvolving
public <ACC, V, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, V> aggregateFunction, ProcessWindowFunction<V, R, K, W> windowFunction, TypeInformation<ACC> accumulatorType, TypeInformation<V> aggregateResultType, TypeInformation<R> resultType) {
checkNotNull(aggregateFunction, "aggregateFunction");
checkNotNull(windowFunction, "windowFunction");
checkNotNull(accumulatorType, "accumulatorType");
checkNotNull(aggregateResultType, "aggregateResultType");
checkNotNull(resultType, "resultType");
if (aggregateFunction instanceof RichFunction) {
throw new UnsupportedOperationException("This aggregate function cannot be a RichFunction.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
aggregateFunction = input.getExecutionEnvironment().clean(aggregateFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalAggregateProcessWindowFunction<>(aggregateFunction, windowFunction), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
AggregatingStateDescriptor<T, ACC, V> stateDesc = new AggregatingStateDescriptor<>("window-contents", aggregateFunction, accumulatorType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator);
}
Aggregations