use of org.apache.flink.annotation.Internal in project flink by apache.
the class FileSystem method getUnguardedFileSystem.
@Internal
public static FileSystem getUnguardedFileSystem(URI uri) throws IOException {
FileSystem fs;
URI asked = uri;
synchronized (SYNCHRONIZATION_OBJECT) {
if (uri.getScheme() == null) {
try {
if (defaultScheme == null) {
defaultScheme = new URI(ConfigConstants.DEFAULT_FILESYSTEM_SCHEME);
}
uri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), uri.getPath(), null, null);
} catch (URISyntaxException e) {
try {
if (defaultScheme.getScheme().equals("file")) {
uri = new URI("file", null, new Path(new File(uri.getPath()).getAbsolutePath()).toUri().getPath(), null);
}
} catch (URISyntaxException ex) {
// we tried to repair it, but could not. report the scheme error
throw new IOException("The URI '" + uri.toString() + "' is not valid.");
}
}
}
if (uri.getScheme() == null) {
throw new IOException("The URI '" + uri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + uri + ".");
}
if (uri.getScheme().equals("file") && uri.getAuthority() != null && !uri.getAuthority().isEmpty()) {
String supposedUri = "file:///" + uri.getAuthority() + uri.getPath();
throw new IOException("Found local file path with authority '" + uri.getAuthority() + "' in path '" + uri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
}
final FSKey key = new FSKey(uri.getScheme(), uri.getAuthority());
// See if there is a file system object in the cache
if (CACHE.containsKey(key)) {
return CACHE.get(key);
}
if (!isFlinkSupportedScheme(uri.getScheme())) {
// no build in support for this file system. Falling back to Hadoop's FileSystem impl.
Class<?> wrapperClass = getHadoopWrapperClassNameForFileSystem(uri.getScheme());
if (wrapperClass != null) {
// hadoop has support for the FileSystem
FSKey wrappedKey = new FSKey(HADOOP_WRAPPER_SCHEME + "+" + uri.getScheme(), uri.getAuthority());
if (CACHE.containsKey(wrappedKey)) {
return CACHE.get(wrappedKey);
}
// cache didn't contain the file system. instantiate it:
// by now we know that the HadoopFileSystem wrapper can wrap the file system.
fs = instantiateHadoopFileSystemWrapper(wrapperClass);
fs.initialize(uri);
CACHE.put(wrappedKey, fs);
} else {
// we can not read from this file system.
throw new IOException("No file system found with scheme " + uri.getScheme() + ", referenced in file URI '" + uri.toString() + "'.");
}
} else {
// we end up here if we have a file system with build-in flink support.
String fsClass = FSDIRECTORY.get(uri.getScheme());
if (fsClass.equals(HADOOP_WRAPPER_FILESYSTEM_CLASS)) {
fs = instantiateHadoopFileSystemWrapper(null);
} else {
fs = instantiateFileSystem(fsClass);
}
// Initialize new file system object
fs.initialize(uri);
// Add new file system object to cache
CACHE.put(key, fs);
}
}
return fs;
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class WindowedStream method fold.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given fold function.
*
* @param initialValue the initial value to be passed to the first invocation of the fold function
* @param foldFunction The fold function.
* @param foldResultType The result type of the fold function.
* @param windowFunction The process window function.
* @param windowResultType The process window function result type.
* @return The data stream that is the result of applying the fold function to the window.
*/
@Internal
public <R, ACC> SingleOutputStreamOperator<R> fold(ACC initialValue, FoldFunction<T, ACC> foldFunction, ProcessWindowFunction<ACC, R, K, W> windowFunction, TypeInformation<ACC> foldResultType, TypeInformation<R> windowResultType) {
if (foldFunction instanceof RichFunction) {
throw new UnsupportedOperationException("FoldFunction can not be a RichFunction.");
}
if (windowAssigner instanceof MergingWindowAssigner) {
throw new UnsupportedOperationException("Fold cannot be used with a merging WindowAssigner.");
}
//clean the closures
windowFunction = input.getExecutionEnvironment().clean(windowFunction);
foldFunction = input.getExecutionEnvironment().clean(foldFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new FoldApplyProcessWindowFunction<>(initialValue, foldFunction, windowFunction, foldResultType)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
FoldingStateDescriptor<T, ACC> stateDesc = new FoldingStateDescriptor<>("window-contents", initialValue, foldFunction, foldResultType.createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(windowFunction), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, windowResultType, operator);
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class CoGroupOperator method translateToDataFlow.
@Override
@Internal
protected org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {
String name = getName() != null ? getName() : "CoGroup at " + defaultName;
try {
keys1.areCompatible(keys2);
} catch (IncompatibleKeysException e) {
throw new InvalidProgramException("The types of the key fields do not match.", e);
}
final org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> po;
if (keys1 instanceof SelectorFunctionKeys && keys2 instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
@SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
po = translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function, getResultType(), name, input1, input2);
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
} else if (keys2 instanceof SelectorFunctionKeys) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
@SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
po = translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function, getInput1Type(), getResultType(), name, input1, input2);
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
} else if (keys1 instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
po = translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function, getInput2Type(), getResultType(), name, input1, input2);
} else if (keys1 instanceof Keys.ExpressionKeys && keys2 instanceof Keys.ExpressionKeys) {
try {
keys1.areCompatible(keys2);
} catch (IncompatibleKeysException e) {
throw new InvalidProgramException("The types of the key fields do not match.", e);
}
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
CoGroupOperatorBase<I1, I2, OUT, CoGroupFunction<I1, I2, OUT>> op = new CoGroupOperatorBase<>(function, new BinaryOperatorInformation<>(getInput1Type(), getInput2Type(), getResultType()), logicalKeyPositions1, logicalKeyPositions2, name);
op.setFirstInput(input1);
op.setSecondInput(input2);
po = op;
} else {
throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
}
// configure shared characteristics
po.setParallelism(getParallelism());
po.setCustomPartitioner(customPartitioner);
if (groupSortKeyOrderFirst.size() > 0) {
Ordering o = new Ordering();
for (Pair<Integer, Order> entry : groupSortKeyOrderFirst) {
o.appendOrdering(entry.getLeft(), null, entry.getRight());
}
po.setGroupOrderForInputOne(o);
}
if (groupSortKeyOrderSecond.size() > 0) {
Ordering o = new Ordering();
for (Pair<Integer, Order> entry : groupSortKeyOrderSecond) {
o.appendOrdering(entry.getLeft(), null, entry.getRight());
}
po.setGroupOrderForInputTwo(o);
}
return po;
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class GroupCombineOperator method getSemanticProperties.
@Override
@Internal
public SingleInputSemanticProperties getSemanticProperties() {
SingleInputSemanticProperties props = super.getSemanticProperties();
// offset semantic information by extracted key fields
if (props != null && this.grouper != null && this.grouper.keys instanceof SelectorFunctionKeys) {
int offset = ((SelectorFunctionKeys<?, ?>) this.grouper.keys).getKeyType().getTotalFields();
if (this.grouper instanceof SortedGrouping) {
offset += ((SortedGrouping<?>) this.grouper).getSortSelectionFunctionKey().getKeyType().getTotalFields();
}
props = SemanticPropUtil.addSourceFieldOffset(props, this.getInputType().getTotalFields(), offset);
}
return props;
}
use of org.apache.flink.annotation.Internal in project flink by apache.
the class WindowedStream method reduce.
/**
* Applies the given window function to each window. The window function is called for each
* evaluation of the window for each key individually. The output of the window function is
* interpreted as a regular non-windowed stream.
*
* <p>Arriving data is incrementally aggregated using the given reducer.
*
* @param reduceFunction The reduce function that is used for incremental aggregation.
* @param function The window function.
* @param resultType Type information for the result type of the window function
* @return The data stream that is the result of applying the window function to the window.
*/
@Internal
public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, ProcessWindowFunction<T, R, K, W> function, TypeInformation<R> resultType) {
if (reduceFunction instanceof RichFunction) {
throw new UnsupportedOperationException("ReduceFunction of apply can not be a RichFunction.");
}
//clean the closures
function = input.getExecutionEnvironment().clean(function);
reduceFunction = input.getExecutionEnvironment().clean(reduceFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "WindowedStream." + callLocation;
String opName;
KeySelector<T, K> keySel = input.getKeySelector();
OneInputStreamOperator<T, R> operator;
if (evictor != null) {
@SuppressWarnings({ "unchecked", "rawtypes" }) TypeSerializer<StreamRecord<T>> streamRecordSerializer = (TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor<>("window-contents", streamRecordSerializer);
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableProcessWindowFunction<>(new ReduceApplyProcessWindowFunction<>(reduceFunction, function)), trigger, evictor, allowedLateness, lateDataOutputTag);
} else {
ReducingStateDescriptor<T> stateDesc = new ReducingStateDescriptor<>("window-contents", reduceFunction, input.getType().createSerializer(getExecutionEnvironment().getConfig()));
opName = "TriggerWindow(" + windowAssigner + ", " + stateDesc + ", " + trigger + ", " + udfName + ")";
operator = new WindowOperator<>(windowAssigner, windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()), keySel, input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueProcessWindowFunction<>(function), trigger, allowedLateness, lateDataOutputTag);
}
return input.transform(opName, resultType, operator);
}
Aggregations