use of org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory in project flink by apache.
the class StreamGraphHasherV2 method generateDeterministicHash.
/**
* Generates a deterministic hash from node-local properties and input and output edges.
*/
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled, StreamGraph streamGraph) {
// Include stream node to hash. We use the current size of the computed
// hashes as the ID. We cannot use the node's ID, because it is
// assigned from a static counter. This will result in two identical
// programs having different hashes.
generateNodeLocalHash(hasher, hashes.size());
// Include chained nodes to hash
for (StreamEdge outEdge : node.getOutEdges()) {
if (isChainable(outEdge, isChainingEnabled, streamGraph)) {
// Use the hash size again, because the nodes are chained to
// this node. This does not add a hash for the chained nodes.
generateNodeLocalHash(hasher, hashes.size());
}
}
byte[] hash = hasher.hash().asBytes();
// this loop (calling this method).
for (StreamEdge inEdge : node.getInEdges()) {
byte[] otherHash = hashes.get(inEdge.getSourceId());
// Sanity check
if (otherHash == null) {
throw new IllegalStateException("Missing hash for input node " + streamGraph.getSourceVertex(inEdge) + ". Cannot generate hash for " + node + ".");
}
for (int j = 0; j < hash.length; j++) {
hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
}
}
if (LOG.isDebugEnabled()) {
String udfClassName = "";
if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
udfClassName = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunctionClassName();
}
LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
}
return hash;
}
use of org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory in project flink by apache.
the class StreamingJobGraphGenerator method configureCheckpointing.
private void configureCheckpointing() {
CheckpointConfig cfg = streamGraph.getCheckpointConfig();
long interval = cfg.getCheckpointInterval();
if (interval < MINIMAL_CHECKPOINT_TIME) {
// interval of max value means disable periodic checkpoint
interval = Long.MAX_VALUE;
}
// --- configure options ---
CheckpointRetentionPolicy retentionAfterTermination;
if (cfg.isExternalizedCheckpointsEnabled()) {
CheckpointConfig.ExternalizedCheckpointCleanup cleanup = cfg.getExternalizedCheckpointCleanup();
// Sanity check
if (cleanup == null) {
throw new IllegalStateException("Externalized checkpoints enabled, but no cleanup mode configured.");
}
retentionAfterTermination = cleanup.deleteOnCancellation() ? CheckpointRetentionPolicy.RETAIN_ON_FAILURE : CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION;
} else {
retentionAfterTermination = CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION;
}
// --- configure the master-side checkpoint hooks ---
final ArrayList<MasterTriggerRestoreHook.Factory> hooks = new ArrayList<>();
for (StreamNode node : streamGraph.getStreamNodes()) {
if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
Function f = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunction();
if (f instanceof WithMasterCheckpointHook) {
hooks.add(new FunctionMasterCheckpointHookFactory((WithMasterCheckpointHook<?>) f));
}
}
}
// because the hooks can have user-defined code, they need to be stored as
// eagerly serialized values
final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks;
if (hooks.isEmpty()) {
serializedHooks = null;
} else {
try {
MasterTriggerRestoreHook.Factory[] asArray = hooks.toArray(new MasterTriggerRestoreHook.Factory[hooks.size()]);
serializedHooks = new SerializedValue<>(asArray);
} catch (IOException e) {
throw new FlinkRuntimeException("Trigger/restore hook is not serializable", e);
}
}
// because the state backend can have user-defined code, it needs to be stored as
// eagerly serialized value
final SerializedValue<StateBackend> serializedStateBackend;
if (streamGraph.getStateBackend() == null) {
serializedStateBackend = null;
} else {
try {
serializedStateBackend = new SerializedValue<StateBackend>(streamGraph.getStateBackend());
} catch (IOException e) {
throw new FlinkRuntimeException("State backend is not serializable", e);
}
}
// because the checkpoint storage can have user-defined code, it needs to be stored as
// eagerly serialized value
final SerializedValue<CheckpointStorage> serializedCheckpointStorage;
if (streamGraph.getCheckpointStorage() == null) {
serializedCheckpointStorage = null;
} else {
try {
serializedCheckpointStorage = new SerializedValue<>(streamGraph.getCheckpointStorage());
} catch (IOException e) {
throw new FlinkRuntimeException("Checkpoint storage is not serializable", e);
}
}
// --- done, put it all together ---
JobCheckpointingSettings settings = new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(interval).setCheckpointTimeout(cfg.getCheckpointTimeout()).setMinPauseBetweenCheckpoints(cfg.getMinPauseBetweenCheckpoints()).setMaxConcurrentCheckpoints(cfg.getMaxConcurrentCheckpoints()).setCheckpointRetentionPolicy(retentionAfterTermination).setExactlyOnce(getCheckpointingMode(cfg) == CheckpointingMode.EXACTLY_ONCE).setTolerableCheckpointFailureNumber(cfg.getTolerableCheckpointFailureNumber()).setUnalignedCheckpointsEnabled(cfg.isUnalignedCheckpointsEnabled()).setCheckpointIdOfIgnoredInFlightData(cfg.getCheckpointIdOfIgnoredInFlightData()).setAlignedCheckpointTimeout(cfg.getAlignedCheckpointTimeout().toMillis()).setEnableCheckpointsAfterTasksFinish(streamGraph.isEnableCheckpointsAfterTasksFinish()).build(), serializedStateBackend, streamGraph.isChangelogStateBackendEnabled(), serializedCheckpointStorage, serializedHooks);
jobGraph.setSnapshotSettings(settings);
}
Aggregations