Search in sources :

Example 1 with InitialPosition

use of org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.InitialPosition in project flink by apache.

the class KinesisDataFetcher method runFetcher.

/**
	 * Starts the fetcher. After starting the fetcher, it can only
	 * be stopped by calling {@link KinesisDataFetcher#shutdownFetcher()}.
	 *
	 * @throws Exception the first error or exception thrown by the fetcher or any of the threads created by the fetcher.
	 */
public void runFetcher() throws Exception {
    // check that we are running before proceeding
    if (!running) {
        return;
    }
    this.mainThread = Thread.currentThread();
    //     and register them to the subscribedShardState list.
    if (LOG.isDebugEnabled()) {
        String logFormat = (!isRestoredFromFailure) ? "Subtask {} is trying to discover initial shards ..." : "Subtask {} is trying to discover any new shards that were created while the consumer wasn't " + "running due to failure ...";
        LOG.debug(logFormat, indexOfThisConsumerSubtask);
    }
    List<KinesisStreamShard> newShardsCreatedWhileNotRunning = discoverNewShardsToSubscribe();
    for (KinesisStreamShard shard : newShardsCreatedWhileNotRunning) {
        // the starting state for new shards created while the consumer wasn't running depends on whether or not
        // we are starting fresh (not restoring from a checkpoint); when we are starting fresh, this simply means
        // all existing shards of streams we are subscribing to are new shards; when we are restoring from checkpoint,
        // any new shards due to Kinesis resharding from the time of the checkpoint will be considered new shards.
        InitialPosition initialPosition = InitialPosition.valueOf(configProps.getProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.DEFAULT_STREAM_INITIAL_POSITION));
        SentinelSequenceNumber startingStateForNewShard = (isRestoredFromFailure) ? SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM : initialPosition.toSentinelSequenceNumber();
        if (LOG.isInfoEnabled()) {
            String logFormat = (!isRestoredFromFailure) ? "Subtask {} will be seeded with initial shard {}, starting state set as sequence number {}" : "Subtask {} will be seeded with new shard {} that was created while the consumer wasn't " + "running due to failure, starting state set as sequence number {}";
            LOG.info(logFormat, indexOfThisConsumerSubtask, shard.toString(), startingStateForNewShard.get());
        }
        registerNewSubscribedShardState(new KinesisStreamShardState(shard, startingStateForNewShard.get()));
    }
    //  2. check that there is at least one shard in the subscribed streams to consume from (can be done by
    //     checking if at least one value in subscribedStreamsToLastDiscoveredShardIds is not null)
    boolean hasShards = false;
    StringBuilder streamsWithNoShardsFound = new StringBuilder();
    for (Map.Entry<String, String> streamToLastDiscoveredShardEntry : subscribedStreamsToLastDiscoveredShardIds.entrySet()) {
        if (streamToLastDiscoveredShardEntry.getValue() != null) {
            hasShards = true;
        } else {
            streamsWithNoShardsFound.append(streamToLastDiscoveredShardEntry.getKey()).append(", ");
        }
    }
    if (streamsWithNoShardsFound.length() != 0 && LOG.isWarnEnabled()) {
        LOG.warn("Subtask {} has failed to find any shards for the following subscribed streams: {}", indexOfThisConsumerSubtask, streamsWithNoShardsFound.toString());
    }
    if (!hasShards) {
        throw new RuntimeException("No shards can be found for all subscribed streams: " + streams);
    }
    //     consumer using a restored state checkpoint
    for (int seededStateIndex = 0; seededStateIndex < subscribedShardsState.size(); seededStateIndex++) {
        KinesisStreamShardState seededShardState = subscribedShardsState.get(seededStateIndex);
        // only start a consuming thread if the seeded subscribed shard has not been completely read already
        if (!seededShardState.getLastProcessedSequenceNum().equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Subtask {} will start consuming seeded shard {} from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, seededShardState.getKinesisStreamShard().toString(), seededShardState.getLastProcessedSequenceNum(), seededStateIndex);
            }
            shardConsumersExecutor.submit(new ShardConsumer<>(this, seededStateIndex, subscribedShardsState.get(seededStateIndex).getKinesisStreamShard(), subscribedShardsState.get(seededStateIndex).getLastProcessedSequenceNum()));
        }
    }
    // ------------------------------------------------------------------------
    // finally, start the infinite shard discovery and consumer launching loop;
    // we will escape from this loop only when shutdownFetcher() or stopWithError() is called
    final long discoveryIntervalMillis = Long.valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_DISCOVERY_INTERVAL_MILLIS)));
    // FLINK-4341:
    // For downstream operators that work on time (ex. window operators), we are required to emit a max value watermark
    // for subtasks that won't continue to have shards to read from unless resharding happens in the future, otherwise
    // the downstream watermarks would not advance, leading to unbounded accumulating state.
    //
    // The side-effect of this limitation is that on resharding, we must fail hard if the newly discovered shard
    // is to be subscribed by a subtask that has previously emitted a max value watermark, otherwise the watermarks
    // will be messed up.
    //
    // There are 2 cases were we need to either emit a max value watermark, or deliberately fail hard:
    //  (a) if this subtask has no more shards to read from unless resharding happens in the future, we emit a max
    //      value watermark. This case is encountered when 1) all previously read shards by this subtask were closed
    //      due to resharding, 2) when this subtask was initially only subscribed to closed shards while the consumer
    //      was told to start from TRIM_HORIZON, or 3) there was initially no shards for this subtask to read on startup.
    //  (b) this subtask has discovered new shards to read from due to a reshard; if this subtask has already emitted
    //      a max value watermark, we must deliberately fail hard to avoid messing up the watermarks. The new shards
    //      will be subscribed by this subtask after restore as initial shards on startup.
    //
    // TODO: This is a temporary workaround until a min-watermark information service is available in the JobManager
    // Please see FLINK-4341 for more detail
    boolean emittedMaxValueWatermark = false;
    if (this.numberOfActiveShards.get() == 0) {
        // FLINK-4341 workaround case (a) - please see the above for details on this case
        LOG.info("Subtask {} has no initial shards to read on startup; emitting max value watermark ...", indexOfThisConsumerSubtask);
        sourceContext.emitWatermark(new Watermark(Long.MAX_VALUE));
        emittedMaxValueWatermark = true;
    }
    while (running) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Subtask {} is trying to discover new shards that were created due to resharding ...", indexOfThisConsumerSubtask);
        }
        List<KinesisStreamShard> newShardsDueToResharding = discoverNewShardsToSubscribe();
        if (newShardsDueToResharding.size() == 0 && this.numberOfActiveShards.get() == 0 && !emittedMaxValueWatermark) {
            // FLINK-4341 workaround case (a) - please see the above for details on this case
            LOG.info("Subtask {} has completed reading all shards; emitting max value watermark ...", indexOfThisConsumerSubtask);
            sourceContext.emitWatermark(new Watermark(Long.MAX_VALUE));
            emittedMaxValueWatermark = true;
        } else if (newShardsDueToResharding.size() > 0 && emittedMaxValueWatermark) {
            // FLINK-4341 workaround case (b) - please see the above for details on this case
            //
            // Note that in the case where on resharding this subtask ceased to read all of it's previous shards
            // but new shards is also to be subscribed by this subtask immediately after, emittedMaxValueWatermark
            // will be false; this allows the fetcher to continue reading the new shards without failing on such cases.
            // However, due to the race condition mentioned above, we might still fall into case (a) first, and
            // then (b) on the next discovery attempt. Although the failure is ideally unnecessary, max value
            // watermark emitting still remains to be correct.
            LOG.warn("Subtask {} has discovered {} new shards to subscribe, but is failing hard to avoid messing" + " up watermarks; the new shards will be subscribed by this subtask after restore ...", indexOfThisConsumerSubtask, newShardsDueToResharding.size());
            throw new RuntimeException("Deliberate failure to avoid messing up watermarks");
        }
        for (KinesisStreamShard shard : newShardsDueToResharding) {
            // since there may be delay in discovering a new shard, all new shards due to
            // resharding should be read starting from the earliest record possible
            KinesisStreamShardState newShardState = new KinesisStreamShardState(shard, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get());
            int newStateIndex = registerNewSubscribedShardState(newShardState);
            if (LOG.isInfoEnabled()) {
                LOG.info("Subtask {} has discovered a new shard {} due to resharding, and will start consuming " + "the shard from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, newShardState.getKinesisStreamShard().toString(), newShardState.getLastProcessedSequenceNum(), newStateIndex);
            }
            shardConsumersExecutor.submit(new ShardConsumer<>(this, newStateIndex, newShardState.getKinesisStreamShard(), newShardState.getLastProcessedSequenceNum()));
        }
        // interval if the running flag was set to false during the middle of the while loop
        if (running && discoveryIntervalMillis != 0) {
            try {
                Thread.sleep(discoveryIntervalMillis);
            } catch (InterruptedException iex) {
            // the sleep may be interrupted by shutdownFetcher()
            }
        }
    }
    // make sure all resources have been terminated before leaving
    awaitTermination();
    // any error thrown in the shard consumer threads will be thrown to the main thread
    Throwable throwable = this.error.get();
    if (throwable != null) {
        if (throwable instanceof Exception) {
            throw (Exception) throwable;
        } else if (throwable instanceof Error) {
            throw (Error) throwable;
        } else {
            throw new Exception(throwable);
        }
    }
}
Also used : KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) InitialPosition(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.InitialPosition) IOException(java.io.IOException) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) HashMap(java.util.HashMap) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Aggregations

IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Watermark (org.apache.flink.streaming.api.watermark.Watermark)1 InitialPosition (org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.InitialPosition)1 KinesisStreamShard (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard)1 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)1 SentinelSequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber)1