Search in sources :

Example 1 with Internal

use of org.apache.flink.annotation.Internal in project flink by apache.

the class WindowedStream method fold.

/**
	 * Applies the given window function to each window. The window function is called for each
	 * evaluation of the window for each key individually. The output of the window function is
	 * interpreted as a regular non-windowed stream.
	 *
	 * <p>Arriving data is incrementally aggregated using the given fold function.
	 *
	 * @param initialValue the initial value to be passed to the first invocation of the fold function
	 * @param foldFunction The fold function.
	 * @param foldResultType The result type of the fold function.
	 * @param windowFunction The process window function.
	 * @param windowResultType The process window function result type.
	 * @return The data stream that is the result of applying the fold function to the window.
	 */
@Internal
public <R, ACC> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, ProcessWindowFunction<ACC, R, K, W> windowFunction, TypeInformation<ACC> foldResultType, TypeInformation<R> windowResultType) {
    if (foldFunction instanceof RichFunction) {
        throw new UnsupportedOperationException("FoldFunction can not be a RichFunction.");
    }
    if (windowAssigner instanceof MergingWindowAssigner) {
        throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
    }
    //clean the closures
    windowFunction = input.getExecutionEnvironment().clean(windowFunction);
    foldFunction = input.getExecutionEnvironment().clean(foldFunction);
    String callLocation = Utils.getCallLocationName();
    String udfName = "WindowedStream." + callLocation;
    String opName;
    KeySelector<T, K> keySel = input.getKeySelector();
    OneInputStreamOperator<T, R> operator;
    if (evictor != null) {
        @SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
        ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
        operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new FoldApplyProcessWindowFunction<>(initialValue, foldFunction, windowFunction, foldResultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
    } else {
        FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldResultType.createSerializer(getExecutionEnvironment().getConfig()));
        opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
        operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
    }
    return input.transform(opName, windowResultType, operator);
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichFunction(org.apache.flink.api.common.functions.RichFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) MergingWindowAssigner(org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) InternalSingleValueProcessWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueProcessWindowFunction) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) InternalIterableProcessWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableProcessWindowFunction) Internal(org.apache.flink.annotation.Internal)

Example 2 with Internal

use of org.apache.flink.annotation.Internal in project flink by apache.

the class TypeExtractor method getTypeInfoFactory.

// --------------------------------------------------------------------------------------------
//  Utility methods
// --------------------------------------------------------------------------------------------
/**
	 * Returns the type information factory for a type using the factory registry or annotations.
	 */
@Internal
public static <OUT> TypeInfoFactory<OUT> getTypeInfoFactory(Type t) {
    final Class<?> factoryClass;
    if (registeredTypeInfoFactories.containsKey(t)) {
        factoryClass = registeredTypeInfoFactories.get(t);
    } else {
        if (!isClassType(t) || !typeToClass(t).isAnnotationPresent(TypeInfo.class)) {
            return null;
        }
        final TypeInfo typeInfoAnnotation = typeToClass(t).getAnnotation(TypeInfo.class);
        factoryClass = typeInfoAnnotation.value();
        // check for valid factory class
        if (!TypeInfoFactory.class.isAssignableFrom(factoryClass)) {
            throw new InvalidTypesException("TypeInfo annotation does not specify a valid TypeInfoFactory.");
        }
    }
    // instantiate
    return (TypeInfoFactory<OUT>) InstantiationUtil.instantiate(factoryClass);
}
Also used : InvalidTypesException(org.apache.flink.api.common.functions.InvalidTypesException) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) SqlTimeTypeInfo(org.apache.flink.api.common.typeinfo.SqlTimeTypeInfo) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) TypeInfo(org.apache.flink.api.common.typeinfo.TypeInfo) TypeInfoFactory(org.apache.flink.api.common.typeinfo.TypeInfoFactory) Internal(org.apache.flink.annotation.Internal)

Example 3 with Internal

use of org.apache.flink.annotation.Internal in project flink by apache.

the class FileSystem method getUnguardedFileSystem.

@Internal
public static FileSystem getUnguardedFileSystem(URI uri) throws IOException {
    FileSystem fs;
    URI asked = uri;
    synchronized (SYNCHRONIZATION_OBJECT) {
        if (uri.getScheme() == null) {
            try {
                if (defaultScheme == null) {
                    defaultScheme = new URI(ConfigConstants.DEFAULT_FILESYSTEM_SCHEME);
                }
                uri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), uri.getPath(), null, null);
            } catch (URISyntaxException e) {
                try {
                    if (defaultScheme.getScheme().equals("file")) {
                        uri = new URI("file", null, new Path(new File(uri.getPath()).getAbsolutePath()).toUri().getPath(), null);
                    }
                } catch (URISyntaxException ex) {
                    // we tried to repair it, but could not. report the scheme error
                    throw new IOException("The URI '" + uri.toString() + "' is not valid.");
                }
            }
        }
        if (uri.getScheme() == null) {
            throw new IOException("The URI '" + uri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + uri + ".");
        }
        if (uri.getScheme().equals("file") && uri.getAuthority() != null && !uri.getAuthority().isEmpty()) {
            String supposedUri = "file:///" + uri.getAuthority() + uri.getPath();
            throw new IOException("Found local file path with authority '" + uri.getAuthority() + "' in path '" + uri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
        }
        final FSKey key = new FSKey(uri.getScheme(), uri.getAuthority());
        // See if there is a file system object in the cache
        if (CACHE.containsKey(key)) {
            return CACHE.get(key);
        }
        if (!isFlinkSupportedScheme(uri.getScheme())) {
            // no build in support for this file system. Falling back to Hadoop's FileSystem impl.
            Class<?> wrapperClass = getHadoopWrapperClassNameForFileSystem(uri.getScheme());
            if (wrapperClass != null) {
                // hadoop has support for the FileSystem
                FSKey wrappedKey = new FSKey(HADOOP_WRAPPER_SCHEME + "+" + uri.getScheme(), uri.getAuthority());
                if (CACHE.containsKey(wrappedKey)) {
                    return CACHE.get(wrappedKey);
                }
                // cache didn't contain the file system. instantiate it:
                // by now we know that the HadoopFileSystem wrapper can wrap the file system.
                fs = instantiateHadoopFileSystemWrapper(wrapperClass);
                fs.initialize(uri);
                CACHE.put(wrappedKey, fs);
            } else {
                // we can not read from this file system.
                throw new IOException("No file system found with scheme " + uri.getScheme() + ", referenced in file URI '" + uri.toString() + "'.");
            }
        } else {
            // we end up here if we have a file system with build-in flink support.
            String fsClass = FSDIRECTORY.get(uri.getScheme());
            if (fsClass.equals(HADOOP_WRAPPER_FILESYSTEM_CLASS)) {
                fs = instantiateHadoopFileSystemWrapper(null);
            } else {
                fs = instantiateFileSystem(fsClass);
            }
            // Initialize new file system object
            fs.initialize(uri);
            // Add new file system object to cache
            CACHE.put(key, fs);
        }
    }
    return fs;
}
Also used : LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) URI(java.net.URI) File(java.io.File) Internal(org.apache.flink.annotation.Internal)

Example 4 with Internal

use of org.apache.flink.annotation.Internal in project flink by apache.

the class ExecutionEnvironment method createProgramPlan.

/**
	 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
	 * and operations and how they interact, as an isolated unit that can be executed with a
	 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
	 * executor is an alternative way to run a program and is only possible if the program consists
	 * only of distributed operations.
	 *
	 * @param jobName The name attached to the plan (displayed in logs and monitoring).
	 * @param clearSinks Whether or not to start a new stage of execution.
	 * @return The program's plan.
	 */
@Internal
public Plan createProgramPlan(String jobName, boolean clearSinks) {
    if (this.sinks.isEmpty()) {
        if (wasExecuted) {
            throw new RuntimeException("No new data sinks have been defined since the " + "last execution. The last execution refers to the latest call to " + "'execute()', 'count()', 'collect()', or 'print()'.");
        } else {
            throw new RuntimeException("No data sinks have been created yet. " + "A program needs at least one sink that consumes data. " + "Examples are writing the data set or printing it.");
        }
    }
    if (jobName == null) {
        jobName = getDefaultName();
    }
    OperatorTranslation translator = new OperatorTranslation();
    Plan plan = translator.translateToPlan(this.sinks, jobName);
    if (getParallelism() > 0) {
        plan.setDefaultParallelism(getParallelism());
    }
    plan.setExecutionConfig(getConfig());
    // Check plan for GenericTypeInfo's and register the types at the serializers.
    if (!config.isAutoTypeRegistrationDisabled()) {
        plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {

            private final HashSet<Class<?>> deduplicator = new HashSet<>();

            @Override
            public boolean preVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
                OperatorInformation<?> opInfo = visitable.getOperatorInfo();
                Serializers.recursivelyRegisterType(opInfo.getOutputType(), config, deduplicator);
                return true;
            }

            @Override
            public void postVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
            }
        });
    }
    try {
        registerCachedFilesWithPlan(plan);
    } catch (Exception e) {
        throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e);
    }
    // clear all the sinks such that the next execution does not redo everything
    if (clearSinks) {
        this.sinks.clear();
        wasExecuted = true;
    }
    // All types are registered now. Print information.
    int registeredTypes = config.getRegisteredKryoTypes().size() + config.getRegisteredPojoTypes().size() + config.getRegisteredTypesWithKryoSerializerClasses().size() + config.getRegisteredTypesWithKryoSerializers().size();
    int defaultKryoSerializers = config.getDefaultKryoSerializers().size() + config.getDefaultKryoSerializerClasses().size();
    LOG.info("The job has {} registered types and {} default Kryo serializers", registeredTypes, defaultKryoSerializers);
    if (config.isForceKryoEnabled() && config.isForceAvroEnabled()) {
        LOG.warn("In the ExecutionConfig, both Avro and Kryo are enforced. Using Kryo serializer");
    }
    if (config.isForceKryoEnabled()) {
        LOG.info("Using KryoSerializer for serializing POJOs");
    }
    if (config.isForceAvroEnabled()) {
        LOG.info("Using AvroSerializer for serializing POJOs");
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Registered Kryo types: {}", config.getRegisteredKryoTypes().toString());
        LOG.debug("Registered Kryo with Serializers types: {}", config.getRegisteredTypesWithKryoSerializers().entrySet().toString());
        LOG.debug("Registered Kryo with Serializer Classes types: {}", config.getRegisteredTypesWithKryoSerializerClasses().entrySet().toString());
        LOG.debug("Registered Kryo default Serializers: {}", config.getDefaultKryoSerializers().entrySet().toString());
        LOG.debug("Registered Kryo default Serializers Classes {}", config.getDefaultKryoSerializerClasses().entrySet().toString());
        LOG.debug("Registered POJO types: {}", config.getRegisteredPojoTypes().toString());
        // print information about static code analysis
        LOG.debug("Static code analysis mode: {}", config.getCodeAnalysisMode());
    }
    return plan;
}
Also used : Operator(org.apache.flink.api.java.operators.Operator) OperatorInformation(org.apache.flink.api.common.operators.OperatorInformation) Plan(org.apache.flink.api.common.Plan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) IOException(java.io.IOException) OperatorTranslation(org.apache.flink.api.java.operators.OperatorTranslation) HashSet(java.util.HashSet) Internal(org.apache.flink.annotation.Internal)

Example 5 with Internal

use of org.apache.flink.annotation.Internal in project flink by apache.

the class AggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
@Internal
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.append(" at ").append(aggregateLocationName);
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        po.setInput(input);
        po.setParallelism(this.getParallelism());
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties) Internal(org.apache.flink.annotation.Internal)

Aggregations

Internal (org.apache.flink.annotation.Internal)12 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)4 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)4 BasicArrayTypeInfo (org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo)3 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)3 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)3 IOException (java.io.IOException)2 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 RichFunction (org.apache.flink.api.common.functions.RichFunction)2 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)2 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)2 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)2 TupleTypeInfoBase (org.apache.flink.api.java.typeutils.TupleTypeInfoBase)2 InternalIterableProcessWindowFunction (org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableProcessWindowFunction)2 InternalSingleValueProcessWindowFunction (org.apache.flink.streaming.runtime.operators.windowing.functions.InternalSingleValueProcessWindowFunction)2 StreamElementSerializer (org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer)2 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)2 File (java.io.File)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1