use of org.apache.flink.annotation.Internal in project flink by apache.
the class WindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue the initial value to be passed to the first invocation of the fold function
* @param foldFunction The fold function.
* @param foldResultType The result type of the fold function.
* @param windowFunction The process window function.
* @param windowResultType The process window function result type.
* @return The data stream that is the result of applying the fold function to the window.
*/
@Internal
public <R, ACC> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, ProcessWindowFunction<ACC, R, K, W> windowFunction, TypeInformation<ACC> foldResultType, TypeInformation<R> windowResultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new FoldApplyProcessWindowFunction<>(initialValue, foldFunction, windowFunction, foldResultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldResultType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, windowResultType, operator);
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class TypeExtractor method getTypeInfoFactory.
// --------------------------------------------------------------------------------------------
// Utility methods
// --------------------------------------------------------------------------------------------
/**
* Returns the type information factory for a type using the factory registry or annotations.
*/
@Internal
public static <OUT> TypeInfoFactory<OUT> getTypeInfoFactory(Type t) {
final Class<?> factoryClass;
if (registeredTypeInfoFactories.containsKey(t)) {
factoryClass = registeredTypeInfoFactories.get(t);
} else {
if (!isClassType(t) || !typeToClass(t).isAnnotationPresent(TypeInfo.class)) {
return null;
}
final TypeInfo typeInfoAnnotation = typeToClass(t).getAnnotation(TypeInfo.class);
factoryClass = typeInfoAnnotation.value();
// check for valid factory class
if (!TypeInfoFactory.class.isAssignableFrom(factoryClass)) {
throw new InvalidTypesException("TypeInfo annotation does not specify a valid TypeInfoFactory.");
}
}
// instantiate
return (TypeInfoFactory<OUT>) InstantiationUtil.instantiate(factoryClass);
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class FileSystem method getUnguardedFileSystem.
@Internal
public static FileSystem getUnguardedFileSystem(URI uri) throws IOException {
FileSystem fs;
URI asked = uri;
synchronized (SYNCHRONIZATION_OBJECT) {
if (uri.getScheme() == null) {
try {
if (defaultScheme == null) {
defaultScheme = new URI(ConfigConstants.DEFAULT_FILESYSTEM_SCHEME);
}
uri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), uri.getPath(), null, null);
} catch (URISyntaxException e) {
try {
if (defaultScheme.getScheme().equals("file")) {
uri = new URI("file", null, new Path(new File(uri.getPath()).getAbsolutePath()).toUri().getPath(), null);
}
} catch (URISyntaxException ex) {
// we tried to repair it, but could not. report the scheme error
throw new IOException("The URI '" + uri.toString() + "' is not valid.");
}
}
}
if (uri.getScheme() == null) {
throw new IOException("The URI '" + uri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + uri + ".");
}
if (uri.getScheme().equals("file") && uri.getAuthority() != null && !uri.getAuthority().isEmpty()) {
String supposedUri = "file:///" + uri.getAuthority() + uri.getPath();
throw new IOException("Found local file path with authority '" + uri.getAuthority() + "' in path '" + uri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
}
final FSKey key = new FSKey(uri.getScheme(), uri.getAuthority());
// See if there is a file system object in the cache
if (CACHE.containsKey(key)) {
return CACHE.get(key);
}
if (!isFlinkSupportedScheme(uri.getScheme())) {
// no build in support for this file system. Falling back to Hadoop's FileSystem impl.
Class<?> wrapperClass = getHadoopWrapperClassNameForFileSystem(uri.getScheme());
if (wrapperClass != null) {
// hadoop has support for the FileSystem
FSKey wrappedKey = new FSKey(HADOOP_WRAPPER_SCHEME + "+" + uri.getScheme(), uri.getAuthority());
if (CACHE.containsKey(wrappedKey)) {
return CACHE.get(wrappedKey);
}
// cache didn't contain the file system. instantiate it:
// by now we know that the HadoopFileSystem wrapper can wrap the file system.
fs = instantiateHadoopFileSystemWrapper(wrapperClass);
fs.initialize(uri);
CACHE.put(wrappedKey, fs);
} else {
// we can not read from this file system.
throw new IOException("No file system found with scheme " + uri.getScheme() + ", referenced in file URI '" + uri.toString() + "'.");
}
} else {
// we end up here if we have a file system with build-in flink support.
String fsClass = FSDIRECTORY.get(uri.getScheme());
if (fsClass.equals(HADOOP_WRAPPER_FILESYSTEM_CLASS)) {
fs = instantiateHadoopFileSystemWrapper(null);
} else {
fs = instantiateFileSystem(fsClass);
}
// Initialize new file system object
fs.initialize(uri);
// Add new file system object to cache
CACHE.put(key, fs);
}
}
return fs;
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class ExecutionEnvironment method createProgramPlan.
/**
* Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
* and operations and how they interact, as an isolated unit that can be executed with a
* {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
* executor is an alternative way to run a program and is only possible if the program consists
* only of distributed operations.
*
* @param jobName The name attached to the plan (displayed in logs and monitoring).
* @param clearSinks Whether or not to start a new stage of execution.
* @return The program's plan.
*/
@Internal
public Plan createProgramPlan(String jobName, boolean clearSinks) {
if (this.sinks.isEmpty()) {
if (wasExecuted) {
throw new RuntimeException("No new data sinks have been defined since the " + "last execution. The last execution refers to the latest call to " + "'execute()', 'count()', 'collect()', or 'print()'.");
} else {
throw new RuntimeException("No data sinks have been created yet. " + "A program needs at least one sink that consumes data. " + "Examples are writing the data set or printing it.");
}
}
if (jobName == null) {
jobName = getDefaultName();
}
OperatorTranslation translator = new OperatorTranslation();
Plan plan = translator.translateToPlan(this.sinks, jobName);
if (getParallelism() > 0) {
plan.setDefaultParallelism(getParallelism());
}
plan.setExecutionConfig(getConfig());
// Check plan for GenericTypeInfo's and register the types at the serializers.
if (!config.isAutoTypeRegistrationDisabled()) {
plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {
private final HashSet<Class<?>> deduplicator = new HashSet<>();
@Override
public boolean preVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
OperatorInformation<?> opInfo = visitable.getOperatorInfo();
Serializers.recursivelyRegisterType(opInfo.getOutputType(), config, deduplicator);
return true;
}
@Override
public void postVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
}
});
}
try {
registerCachedFilesWithPlan(plan);
} catch (Exception e) {
throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e);
}
// clear all the sinks such that the next execution does not redo everything
if (clearSinks) {
this.sinks.clear();
wasExecuted = true;
}
// All types are registered now. Print information.
int registeredTypes = config.getRegisteredKryoTypes().size() + config.getRegisteredPojoTypes().size() + config.getRegisteredTypesWithKryoSerializerClasses().size() + config.getRegisteredTypesWithKryoSerializers().size();
int defaultKryoSerializers = config.getDefaultKryoSerializers().size() + config.getDefaultKryoSerializerClasses().size();
LOG.info("The job has {} registered types and {} default Kryo serializers", registeredTypes, defaultKryoSerializers);
if (config.isForceKryoEnabled() && config.isForceAvroEnabled()) {
LOG.warn("In the ExecutionConfig, both Avro and Kryo are enforced. Using Kryo serializer");
}
if (config.isForceKryoEnabled()) {
LOG.info("Using KryoSerializer for serializing POJOs");
}
if (config.isForceAvroEnabled()) {
LOG.info("Using AvroSerializer for serializing POJOs");
}
if (LOG.isDebugEnabled()) {
LOG.debug("Registered Kryo types: {}", config.getRegisteredKryoTypes().toString());
LOG.debug("Registered Kryo with Serializers types: {}", config.getRegisteredTypesWithKryoSerializers().entrySet().toString());
LOG.debug("Registered Kryo with Serializer Classes types: {}", config.getRegisteredTypesWithKryoSerializerClasses().entrySet().toString());
LOG.debug("Registered Kryo default Serializers: {}", config.getDefaultKryoSerializers().entrySet().toString());
LOG.debug("Registered Kryo default Serializers Classes {}", config.getDefaultKryoSerializerClasses().entrySet().toString());
LOG.debug("Registered POJO types: {}", config.getRegisteredPojoTypes().toString());
// print information about static code analysis
LOG.debug("Static code analysis mode: {}", config.getCodeAnalysisMode());
}
return plan;
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class AggregateOperator method translateToDataFlow.
@SuppressWarnings("unchecked")
@Override
@Internal
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
// sanity check
if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
throw new IllegalStateException();
}
// construct the aggregation function
AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
int[] fields = new int[this.fields.size()];
StringBuilder genName = new StringBuilder();
for (int i = 0; i < fields.length; i++) {
aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
fields[i] = this.fields.get(i);
genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
}
genName.append(" at ").append(aggregateLocationName);
genName.setLength(genName.length() - 1);
@SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(aggFunctions, fields);
String name = getName() != null ? getName() : genName.toString();
// distinguish between grouped reduce and non-grouped reduce
if (this.grouping == null) {
// non grouped aggregation
UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
po.setCombinable(true);
// set input
po.setInput(input);
// set parallelism
po.setParallelism(this.getParallelism());
return po;
}
if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
// grouped aggregation
int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
po.setCombinable(true);
po.setInput(input);
po.setParallelism(this.getParallelism());
po.setCustomPartitioner(grouping.getCustomPartitioner());
SingleInputSemanticProperties props = new SingleInputSemanticProperties();
for (int keyField : logicalKeyPositions) {
boolean keyFieldUsedInAgg = false;
for (int aggField : fields) {
if (keyField == aggField) {
keyFieldUsedInAgg = true;
break;
}
}
if (!keyFieldUsedInAgg) {
props.addForwardedField(keyField, keyField);
}
}
po.setSemanticProperties(props);
return po;
} else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
} else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
Aggregations