Search in sources :

Example 1 with UdfStreamOperatorFactory

use of org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory in project flink by apache.

the class StreamGraphHasherV2 method generateDeterministicHash.

/**
 * Generates a deterministic hash from node-local properties and input and output edges.
 */
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled, StreamGraph streamGraph) {
    // Include stream node to hash. We use the current size of the computed
    // hashes as the ID. We cannot use the node's ID, because it is
    // assigned from a static counter. This will result in two identical
    // programs having different hashes.
    generateNodeLocalHash(hasher, hashes.size());
    // Include chained nodes to hash
    for (StreamEdge outEdge : node.getOutEdges()) {
        if (isChainable(outEdge, isChainingEnabled, streamGraph)) {
            // Use the hash size again, because the nodes are chained to
            // this node. This does not add a hash for the chained nodes.
            generateNodeLocalHash(hasher, hashes.size());
        }
    }
    byte[] hash = hasher.hash().asBytes();
    // this loop (calling this method).
    for (StreamEdge inEdge : node.getInEdges()) {
        byte[] otherHash = hashes.get(inEdge.getSourceId());
        // Sanity check
        if (otherHash == null) {
            throw new IllegalStateException("Missing hash for input node " + streamGraph.getSourceVertex(inEdge) + ". Cannot generate hash for " + node + ".");
        }
        for (int j = 0; j < hash.length; j++) {
            hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
        }
    }
    if (LOG.isDebugEnabled()) {
        String udfClassName = "";
        if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
            udfClassName = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunctionClassName();
        }
        LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
    }
    return hash;
}
Also used : UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) StringUtils.byteToHexString(org.apache.flink.util.StringUtils.byteToHexString)

Example 2 with UdfStreamOperatorFactory

use of org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory in project flink by apache.

the class StreamingJobGraphGenerator method configureCheckpointing.

private void configureCheckpointing() {
    CheckpointConfig cfg = streamGraph.getCheckpointConfig();
    long interval = cfg.getCheckpointInterval();
    if (interval < MINIMAL_CHECKPOINT_TIME) {
        // interval of max value means disable periodic checkpoint
        interval = Long.MAX_VALUE;
    }
    // --- configure options ---
    CheckpointRetentionPolicy retentionAfterTermination;
    if (cfg.isExternalizedCheckpointsEnabled()) {
        CheckpointConfig.ExternalizedCheckpointCleanup cleanup = cfg.getExternalizedCheckpointCleanup();
        // Sanity check
        if (cleanup == null) {
            throw new IllegalStateException("Externalized checkpoints enabled, but no cleanup mode configured.");
        }
        retentionAfterTermination = cleanup.deleteOnCancellation() ? CheckpointRetentionPolicy.RETAIN_ON_FAILURE : CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION;
    } else {
        retentionAfterTermination = CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION;
    }
    // --- configure the master-side checkpoint hooks ---
    final ArrayList<MasterTriggerRestoreHook.Factory> hooks = new ArrayList<>();
    for (StreamNode node : streamGraph.getStreamNodes()) {
        if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
            Function f = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunction();
            if (f instanceof WithMasterCheckpointHook) {
                hooks.add(new FunctionMasterCheckpointHookFactory((WithMasterCheckpointHook<?>) f));
            }
        }
    }
    // because the hooks can have user-defined code, they need to be stored as
    // eagerly serialized values
    final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks;
    if (hooks.isEmpty()) {
        serializedHooks = null;
    } else {
        try {
            MasterTriggerRestoreHook.Factory[] asArray = hooks.toArray(new MasterTriggerRestoreHook.Factory[hooks.size()]);
            serializedHooks = new SerializedValue<>(asArray);
        } catch (IOException e) {
            throw new FlinkRuntimeException("Trigger/restore hook is not serializable", e);
        }
    }
    // because the state backend can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<StateBackend> serializedStateBackend;
    if (streamGraph.getStateBackend() == null) {
        serializedStateBackend = null;
    } else {
        try {
            serializedStateBackend = new SerializedValue<StateBackend>(streamGraph.getStateBackend());
        } catch (IOException e) {
            throw new FlinkRuntimeException("State backend is not serializable", e);
        }
    }
    // because the checkpoint storage can have user-defined code, it needs to be stored as
    // eagerly serialized value
    final SerializedValue<CheckpointStorage> serializedCheckpointStorage;
    if (streamGraph.getCheckpointStorage() == null) {
        serializedCheckpointStorage = null;
    } else {
        try {
            serializedCheckpointStorage = new SerializedValue<>(streamGraph.getCheckpointStorage());
        } catch (IOException e) {
            throw new FlinkRuntimeException("Checkpoint storage is not serializable", e);
        }
    }
    // --- done, put it all together ---
    JobCheckpointingSettings settings = new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(interval).setCheckpointTimeout(cfg.getCheckpointTimeout()).setMinPauseBetweenCheckpoints(cfg.getMinPauseBetweenCheckpoints()).setMaxConcurrentCheckpoints(cfg.getMaxConcurrentCheckpoints()).setCheckpointRetentionPolicy(retentionAfterTermination).setExactlyOnce(getCheckpointingMode(cfg) == CheckpointingMode.EXACTLY_ONCE).setTolerableCheckpointFailureNumber(cfg.getTolerableCheckpointFailureNumber()).setUnalignedCheckpointsEnabled(cfg.isUnalignedCheckpointsEnabled()).setCheckpointIdOfIgnoredInFlightData(cfg.getCheckpointIdOfIgnoredInFlightData()).setAlignedCheckpointTimeout(cfg.getAlignedCheckpointTimeout().toMillis()).setEnableCheckpointsAfterTasksFinish(streamGraph.isEnableCheckpointsAfterTasksFinish()).build(), serializedStateBackend, streamGraph.isChangelogStateBackendEnabled(), serializedCheckpointStorage, serializedHooks);
    jobGraph.setSnapshotSettings(settings);
}
Also used : UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) WithMasterCheckpointHook(org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ArrayList(java.util.ArrayList) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) LoggerFactory(org.slf4j.LoggerFactory) UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) IOException(java.io.IOException) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) StateBackend(org.apache.flink.runtime.state.StateBackend) Function(org.apache.flink.api.common.functions.Function) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy)

Aggregations

UdfStreamOperatorFactory (org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Function (org.apache.flink.api.common.functions.Function)1 CheckpointRetentionPolicy (org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy)1 MasterTriggerRestoreHook (org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook)1 JobCheckpointingSettings (org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings)1 CheckpointStorage (org.apache.flink.runtime.state.CheckpointStorage)1 StateBackend (org.apache.flink.runtime.state.StateBackend)1 WithMasterCheckpointHook (org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook)1 CheckpointConfig (org.apache.flink.streaming.api.environment.CheckpointConfig)1 SourceOperatorFactory (org.apache.flink.streaming.api.operators.SourceOperatorFactory)1 StreamOperatorFactory (org.apache.flink.streaming.api.operators.StreamOperatorFactory)1 YieldingOperatorFactory (org.apache.flink.streaming.api.operators.YieldingOperatorFactory)1 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)1 StringUtils.byteToHexString (org.apache.flink.util.StringUtils.byteToHexString)1 LoggerFactory (org.slf4j.LoggerFactory)1