Search in sources :

Example 71 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class StreamOperatorTask method process.

/**
   * Passes the incoming message envelopes along to the {@link org.apache.samza.operators.impl.RootOperatorImpl} node
   * for the input {@link SystemStream}.
   * <p>
   * From then on, each {@link org.apache.samza.operators.impl.OperatorImpl} propagates its transformed output to
   * its chained {@link org.apache.samza.operators.impl.OperatorImpl}s itself.
   *
   * @param ime incoming message envelope to process
   * @param collector the collector to send messages with
   * @param coordinator the coordinator to request commits or shutdown
   */
@Override
public final void process(IncomingMessageEnvelope ime, MessageCollector collector, TaskCoordinator coordinator) {
    SystemStream systemStream = ime.getSystemStreamPartition().getSystemStream();
    InputStreamInternal inputStream = inputSystemStreamToInputStream.get(systemStream);
    RootOperatorImpl rootOperatorImpl = operatorImplGraph.getRootOperator(systemStream);
    if (rootOperatorImpl != null) {
        // TODO: SAMZA-1148 - Cast to appropriate input (key, msg) types based on the serde
        // before applying the msgBuilder.
        Object message = inputStream.getMsgBuilder().apply(ime.getKey(), ime.getMessage());
        rootOperatorImpl.onMessage(message, collector, coordinator);
    }
}
Also used : SystemStream(org.apache.samza.system.SystemStream) RootOperatorImpl(org.apache.samza.operators.impl.RootOperatorImpl) InputStreamInternal(org.apache.samza.operators.stream.InputStreamInternal)

Example 72 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TransactionalStateTaskRestoreManager method registerStartingOffsets.

/**
 * Determines the starting offset for each store changelog SSP that needs to be restored from,
 * and registers it with the respective SystemConsumer.
 */
@VisibleForTesting
static void registerStartingOffsets(TaskModel taskModel, StoreActions storeActions, Map<String, SystemStream> storeChangelogs, SystemAdmins systemAdmins, Map<String, SystemConsumer> storeConsumers, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets) {
    Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
    // must register at least one SSP with each changelog system consumer otherwise start will throw.
    // hence we register upcoming offset as the dummy offset by default and override it later if necessary.
    // using upcoming offset ensures that no messages are replayed by default.
    storeChangelogs.forEach((storeName, changelog) -> {
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String upcomingOffset = currentOffsets.getUpcomingOffset();
        LOG.info("Temporarily registering upcoming offset: {} as the starting offest for changelog ssp: {}. " + "This might be overridden later for stores that need restoring.", upcomingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, upcomingOffset);
    });
    // now register the actual starting offset if necessary. system consumer will ensure that the lower of the
    // two registered offsets is used as the starting offset.
    storesToRestore.forEach((storeName, restoreOffsets) -> {
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelog.getSystem());
        validateRestoreOffsets(restoreOffsets, systemAdmin);
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = currentOffsets.getOldestOffset();
        // if the starting offset equals oldest offset (e.g. for full restore), start from the oldest offset (inclusive).
        // else, start from the next (upcoming) offset.
        String startingOffset;
        if (systemAdmin.offsetComparator(restoreOffsets.startingOffset, oldestOffset) == 0) {
            startingOffset = oldestOffset;
        } else {
            Map<SystemStreamPartition, String> offsetMap = ImmutableMap.of(changelogSSP, restoreOffsets.startingOffset);
            startingOffset = systemAdmin.getOffsetsAfter(offsetMap).get(changelogSSP);
        }
        LOG.info("Registering starting offset: {} for changelog ssp: {}", startingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, startingOffset);
    });
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 73 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TransactionalStateTaskRestoreManager method getCheckpointedChangelogOffsets.

private Map<String, KafkaStateCheckpointMarker> getCheckpointedChangelogOffsets(Checkpoint checkpoint) {
    Map<String, KafkaStateCheckpointMarker> checkpointedChangelogOffsets = new HashMap<>();
    if (checkpoint == null)
        return checkpointedChangelogOffsets;
    if (checkpoint instanceof CheckpointV2) {
        Map<String, Map<String, String>> factoryStoreSCMs = ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
        if (factoryStoreSCMs.containsKey(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME)) {
            factoryStoreSCMs.get(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME).forEach((storeName, scmString) -> {
                KafkaStateCheckpointMarker kafkaSCM = KafkaStateCheckpointMarker.deserialize(scmString);
                checkpointedChangelogOffsets.put(storeName, kafkaSCM);
            });
        }
    // skip the non-KafkaStateCheckpointMarkers
    } else if (checkpoint instanceof CheckpointV1) {
        // If the checkpoint v1 is used, we need to fetch the changelog SSPs in the inputOffsets in order to get the
        // store offset.
        Map<SystemStreamPartition, String> checkpointedOffsets = checkpoint.getOffsets();
        storeChangelogs.forEach((storeName, systemStream) -> {
            Partition changelogPartition = taskModel.getChangelogPartition();
            SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(systemStream, changelogPartition);
            String checkpointedOffset = checkpointedOffsets.get(storeChangelogSSP);
            if (StringUtils.isNotBlank(checkpointedOffset)) {
                KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointedOffset);
                KafkaStateCheckpointMarker marker = new KafkaStateCheckpointMarker(storeChangelogSSP, kafkaChangelogSSPOffset.getChangelogOffset());
                checkpointedChangelogOffsets.put(storeName, marker);
            }
        });
    } else {
        throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
    }
    return checkpointedChangelogOffsets;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ListMultimap(com.google.common.collect.ListMultimap) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 74 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TransactionalStateTaskRestoreManager method restore.

@Override
public CompletableFuture<Void> restore() {
    return CompletableFuture.runAsync(() -> {
        Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
        for (Map.Entry<String, RestoreOffsets> entry : storesToRestore.entrySet()) {
            String storeName = entry.getKey();
            String endOffset = entry.getValue().endingOffset;
            SystemStream systemStream = storeChangelogs.get(storeName);
            SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(systemStream.getSystem());
            SystemConsumer systemConsumer = storeConsumers.get(storeName);
            SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
            ChangelogSSPIterator changelogSSPIterator = new ChangelogSSPIterator(systemConsumer, changelogSSP, endOffset, systemAdmin, true, currentChangelogOffsets.get(changelogSSP).getNewestOffset());
            StorageEngine taskStore = storeEngines.get(storeName);
            LOG.info("Restoring store: {} for task: {}", storeName, taskModel.getTaskName());
            try {
                taskStore.restore(changelogSSPIterator);
            } catch (InterruptedException e) {
                String msg = String.format("Interrupted while restoring store: %s for task: %s", storeName, taskModel.getTaskName().getTaskName());
                // wrap in unchecked exception to throw from lambda
                throw new SamzaException(msg, e);
            }
        }
    }, restoreExecutor);
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 75 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class NonTransactionalStateTaskRestoreManager method registerStartingOffsets.

/**
 * Determines the starting offset for each store SSP (based on {@link #getStartingOffset(SystemStreamPartition, SystemAdmin)}) and
 * registers it with the respective SystemConsumer for starting consumption.
 */
private void registerStartingOffsets() {
    for (Map.Entry<String, SystemStream> changelogSystemStreamEntry : storeChangelogs.entrySet()) {
        SystemStreamPartition systemStreamPartition = new SystemStreamPartition(changelogSystemStreamEntry.getValue(), taskModel.getChangelogPartition());
        SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelogSystemStreamEntry.getValue().getSystem());
        SystemConsumer systemConsumer = storeConsumers.get(changelogSystemStreamEntry.getKey());
        String offset = getStartingOffset(systemStreamPartition, systemAdmin);
        if (offset != null) {
            LOG.info("Registering change log consumer with offset " + offset + " for %" + systemStreamPartition);
            systemConsumer.register(systemStreamPartition, offset);
        } else {
            LOG.info("Skipping change log restoration for {} because stream appears to be empty (offset was null).", systemStreamPartition);
            taskStoresToRestore.remove(changelogSystemStreamEntry.getKey());
        }
    }
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) SystemAdmin(org.apache.samza.system.SystemAdmin) HashMap(java.util.HashMap) Map(java.util.Map) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32