use of org.apache.flink.runtime.checkpoint.OperatorState in project flink by apache.
the class SavepointReader method readUnionState.
/**
* Read operator {@code UnionState} from a {@code Savepoint} when a custom serializer was used;
* e.g., a different serializer than the one returned by {@code
* TypeInformation#createSerializer}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param typeInfo The type of the elements in the state.
* @param serializer The serializer used to write the elements into state.
* @param <T> The type of the values that are in the union state.
* @return A {@code DataStream} representing the elements in state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public <T> DataStream<T> readUnionState(String uid, String name, TypeInformation<T> typeInfo, TypeSerializer<T> serializer) throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
ListStateDescriptor<T> descriptor = new ListStateDescriptor<>(name, serializer);
UnionStateInputFormat<T> inputFormat = new UnionStateInputFormat<>(operatorState, MutableConfig.of(env.getConfiguration()), stateBackend, descriptor);
return SourceBuilder.fromFormat(env, inputFormat, typeInfo);
}
use of org.apache.flink.runtime.checkpoint.OperatorState in project flink by apache.
the class SavepointReader method readBroadcastState.
/**
* Read operator {@code BroadcastState} from a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param keyTypeInfo The type information for the keys in the state.
* @param valueTypeInfo The type information for the values in the state.
* @param <K> The type of keys in state.
* @param <V> The type of values in state.
* @return A {@code DataStream} of key-value pairs from state.
* @throws IOException If the savepoint does not contain the specified uid.
*/
public <K, V> DataStream<Tuple2<K, V>> readBroadcastState(String uid, String name, TypeInformation<K> keyTypeInfo, TypeInformation<V> valueTypeInfo) throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
MapStateDescriptor<K, V> descriptor = new MapStateDescriptor<>(name, keyTypeInfo, valueTypeInfo);
BroadcastStateInputFormat<K, V> inputFormat = new BroadcastStateInputFormat<>(operatorState, MutableConfig.of(env.getConfiguration()), stateBackend, descriptor);
return SourceBuilder.fromFormat(env, inputFormat, new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo));
}
use of org.apache.flink.runtime.checkpoint.OperatorState in project flink by apache.
the class Savepoint method load.
/**
* Loads an existing savepoint. Useful if you want to query, modify, or extend the state of an
* existing application.
*
* @param env The execution environment used to transform the savepoint.
* @param path The path to an existing savepoint on disk.
* @param stateBackend The state backend of the savepoint.
* @see #load(ExecutionEnvironment, String)
*/
public static ExistingSavepoint load(ExecutionEnvironment env, String path, StateBackend stateBackend) throws IOException {
Preconditions.checkNotNull(stateBackend, "The state backend must not be null");
CheckpointMetadata metadata = SavepointLoader.loadSavepointMetadata(path);
int maxParallelism = metadata.getOperatorStates().stream().map(OperatorState::getMaxParallelism).max(Comparator.naturalOrder()).orElseThrow(() -> new RuntimeException("Savepoint must contain at least one operator state."));
SavepointMetadata savepointMetadata = new SavepointMetadata(maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
return new ExistingSavepoint(env, savepointMetadata, stateBackend);
}
use of org.apache.flink.runtime.checkpoint.OperatorState in project flink by apache.
the class Savepoint method load.
/**
* Loads an existing savepoint. Useful if you want to query, modify, or extend the state of an
* existing application. The savepoint will be read using the state backend defined via the
* clusters configuration.
*
* @param env The execution environment used to transform the savepoint.
* @param path The path to an existing savepoint on disk.
* @see #load(ExecutionEnvironment, String, StateBackend)
*/
public static ExistingSavepoint load(ExecutionEnvironment env, String path) throws IOException {
CheckpointMetadata metadata = SavepointLoader.loadSavepointMetadata(path);
int maxParallelism = metadata.getOperatorStates().stream().map(OperatorState::getMaxParallelism).max(Comparator.naturalOrder()).orElseThrow(() -> new RuntimeException("Savepoint must contain at least one operator state."));
SavepointMetadata savepointMetadata = new SavepointMetadata(maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
return new ExistingSavepoint(env, savepointMetadata, null);
}
use of org.apache.flink.runtime.checkpoint.OperatorState in project flink by apache.
the class WritableSavepoint method write.
/**
* Write out a new or updated savepoint.
*
* @param path The path to where the savepoint should be written.
*/
public final void write(String path) {
final Path savepointPath = new Path(path);
List<BootstrapTransformationWithID<?>> newOperatorTransformations = metadata.getNewOperators();
DataSet<OperatorState> newOperatorStates = writeOperatorStates(newOperatorTransformations, configuration, savepointPath);
List<OperatorState> existingOperators = metadata.getExistingOperators();
DataSet<OperatorState> finalOperatorStates;
if (existingOperators.isEmpty()) {
finalOperatorStates = newOperatorStates;
} else {
DataSet<OperatorState> existingOperatorStates = newOperatorStates.getExecutionEnvironment().fromCollection(existingOperators).name("existingOperatorStates");
existingOperatorStates.flatMap(new StatePathExtractor()).setParallelism(1).output(new FileCopyFunction(path));
finalOperatorStates = newOperatorStates.union(existingOperatorStates);
}
finalOperatorStates.reduceGroup(new MergeOperatorStates(metadata.getMasterStates())).name("reduce(OperatorState)").output(new SavepointOutputFormat(savepointPath)).name(path);
}
Aggregations