Search in sources :

Example 11 with Checkpoint

use of org.apache.samza.checkpoint.Checkpoint in project samza by apache.

the class ContainerStorageManager method getBackendFactoryStoreNames.

/**
 * Return a map of backend factory names to set of stores that should be restored using it
 */
@VisibleForTesting
Map<String, Set<String>> getBackendFactoryStoreNames(Checkpoint checkpoint, Set<String> storeNames, StorageConfig storageConfig) {
    // backendFactoryName -> set(storeNames)
    Map<String, Set<String>> backendFactoryStoreNames = new HashMap<>();
    if (checkpoint != null && checkpoint.getVersion() == 1) {
        // Only restore stores with changelog streams configured
        Set<String> changelogStores = storeNames.stream().filter(storeName -> storageConfig.getChangelogStream(storeName).isPresent()).collect(Collectors.toSet());
        // Default to changelog backend factory when using checkpoint v1 for backwards compatibility
        if (!changelogStores.isEmpty()) {
            backendFactoryStoreNames.put(StorageConfig.KAFKA_STATE_BACKEND_FACTORY, changelogStores);
        }
        if (storeNames.size() > changelogStores.size()) {
            Set<String> nonChangelogStores = storeNames.stream().filter(storeName -> !changelogStores.contains(storeName)).collect(Collectors.toSet());
            LOG.info("non-Side input stores: {}, do not have a configured store changelogs for checkpoint V1," + "restore for the store will be skipped", nonChangelogStores);
        }
    } else if (checkpoint == null || checkpoint.getVersion() == 2) {
        // Extract the state checkpoint markers if checkpoint exists
        Map<String, Map<String, String>> stateCheckpointMarkers = checkpoint == null ? Collections.emptyMap() : ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
        // Find stores associated to each state backend factory
        storeNames.forEach(storeName -> {
            List<String> storeFactories = storageConfig.getStoreRestoreFactories(storeName);
            if (storeFactories.isEmpty()) {
                // If the restore factory is not configured for the store and the store does not have a changelog topic
                LOG.info("non-Side input store: {}, does not have a configured restore factories nor store changelogs," + "restore for the store will be skipped", storeName);
            } else {
                // Search the ordered list for the first matched state backend factory in the checkpoint
                // If the checkpoint does not exist or state checkpoint markers does not exist, we match the first configured
                // restore manager
                Optional<String> factoryNameOpt = storeFactories.stream().filter(factoryName -> stateCheckpointMarkers.containsKey(factoryName) && stateCheckpointMarkers.get(factoryName).containsKey(storeName)).findFirst();
                String factoryName;
                if (factoryNameOpt.isPresent()) {
                    factoryName = factoryNameOpt.get();
                } else {
                    // Restore factories configured but no checkpoints found
                    // Use first configured restore factory
                    factoryName = storeFactories.get(0);
                    LOG.warn("No matching checkpoints found for configured factories: {}, " + "defaulting to using the first configured factory with no checkpoints", storeFactories);
                }
                if (!backendFactoryStoreNames.containsKey(factoryName)) {
                    backendFactoryStoreNames.put(factoryName, new HashSet<>());
                }
                backendFactoryStoreNames.get(factoryName).add(storeName);
            }
        });
    } else {
        throw new SamzaException(String.format("Unsupported checkpoint version %s", checkpoint.getVersion()));
    }
    return backendFactoryStoreNames;
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SerdeUtils(org.apache.samza.table.utils.SerdeUtils) LoggerFactory(org.slf4j.LoggerFactory) TaskModel(org.apache.samza.job.model.TaskModel) Future(java.util.concurrent.Future) SystemConsumer(org.apache.samza.system.SystemConsumer) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) Map(java.util.Map) TaskInstanceCollector(org.apache.samza.task.TaskInstanceCollector) RoundRobinChooserFactory(org.apache.samza.system.chooser.RoundRobinChooserFactory) Path(java.nio.file.Path) StorageConfig(org.apache.samza.config.StorageConfig) RunLoopTask(org.apache.samza.container.RunLoopTask) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) DefaultChooser(org.apache.samza.system.chooser.DefaultChooser) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Optional(java.util.Optional) Config(org.apache.samza.config.Config) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SystemAdmins(org.apache.samza.system.SystemAdmins) ScalaJavaUtil(org.apache.samza.util.ScalaJavaUtil) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) MessageChooser(org.apache.samza.system.chooser.MessageChooser) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Gauge(org.apache.samza.metrics.Gauge) ImmutableList(com.google.common.collect.ImmutableList) SerdeManager(org.apache.samza.serializers.SerdeManager) MessageCollector(org.apache.samza.task.MessageCollector) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) RunLoop(org.apache.samza.container.RunLoop) SystemConsumersMetrics(org.apache.samza.system.SystemConsumersMetrics) ExecutorService(java.util.concurrent.ExecutorService) MapUtils(org.apache.commons.collections4.MapUtils) JavaConversions(scala.collection.JavaConversions) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SystemFactory(org.apache.samza.system.SystemFactory) Clock(org.apache.samza.util.Clock) SystemConsumers(org.apache.samza.system.SystemConsumers) File(java.io.File) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) TaskMode(org.apache.samza.job.model.TaskMode) Entry(org.apache.samza.storage.kv.Entry) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Set(java.util.Set) HashSet(java.util.HashSet) Optional(java.util.Optional) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 12 with Checkpoint

use of org.apache.samza.checkpoint.Checkpoint in project samza by apache.

the class TransactionalStateTaskRestoreManager method getStoreActions.

/**
 * Marks each persistent but non-logged store for deletion.
 *
 * For each logged store, based on the current, checkpointed and local changelog offsets,
 * 1. decides which directories (current and checkpoints) to delete for persistent stores.
 * 2. decides which directories (checkpoints) to retain for persistent stores.
 * 3. decides which stores (persistent or not) need to be restored, and the beginning and end offsets for the restore.
 *
 * When this method returns, in StoreActions,
 * 1. all persistent store current directories will be present in storeDirsToDelete
 * 2. each persistent store checkpoint directory will be present in either storeDirToRetain or storeDirsToDelete.
 * 3. there will be at most one storeDirToRetain per persistent store, which will be a checkpoint directory.
 * 4. any stores (persistent or not) that need to be restored from changelogs will be present in
 *    storesToRestore with appropriate offsets.
 */
@VisibleForTesting
static StoreActions getStoreActions(TaskModel taskModel, Map<String, StorageEngine> storeEngines, Map<String, SystemStream> storeChangelogs, Map<String, KafkaStateCheckpointMarker> kafkaStateCheckpointMarkers, CheckpointId checkpointId, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets, SystemAdmins systemAdmins, StorageManagerUtil storageManagerUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory, Config config, Clock clock) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    Map<String, File> storeDirToRetain = new HashMap<>();
    ListMultimap<String, File> storeDirsToDelete = ArrayListMultimap.create();
    Map<String, RestoreOffsets> storesToRestore = new HashMap<>();
    storeEngines.forEach((storeName, storageEngine) -> {
        // do nothing if store is non persistent and not logged (e.g. in memory cache only)
        if (!storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            return;
        }
        // persistent but non-logged stores are always deleted
        if (storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            File currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion since it is not a logged store.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            // persistent but non-logged stores should not have checkpoint dirs
            return;
        }
        // get the oldest and newest current changelog SSP offsets as well as the checkpointed changelog SSP offset
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin admin = systemAdmins.getSystemAdmin(changelogSSP.getSystem());
        SystemStreamPartitionMetadata changelogSSPMetadata = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = changelogSSPMetadata.getOldestOffset();
        String newestOffset = changelogSSPMetadata.getNewestOffset();
        // can be null if no message, or message has null offset
        String checkpointedOffset = null;
        if (kafkaStateCheckpointMarkers.containsKey(storeName) && StringUtils.isNotBlank(kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset())) {
            checkpointedOffset = kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset();
        }
        long timeSinceLastCheckpointInMs = checkpointId == null ? Long.MAX_VALUE : System.currentTimeMillis() - checkpointId.getMillis();
        // if the clean.store.start config is set, delete current and checkpoint dirs, restore from oldest offset to checkpointed
        if (storageEngine.getStoreProperties().isPersistedToDisk() && new StorageConfig(config).cleanLoggedStoreDirsOnStart(storeName)) {
            File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode).forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", checkpointDir, storeName, taskName);
                storeDirsToDelete.put(storeName, checkpointDir);
            });
            LOG.info("Marking restore offsets for store: {} in task: {} to {}, {} ", storeName, taskName, oldestOffset, checkpointedOffset);
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            return;
        }
        Optional<File> currentDirOptional;
        Optional<List<File>> checkpointDirsOptional;
        if (!storageEngine.getStoreProperties().isPersistedToDisk()) {
            currentDirOptional = Optional.empty();
            checkpointDirsOptional = Optional.empty();
        } else {
            currentDirOptional = Optional.of(storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode));
            checkpointDirsOptional = Optional.of(storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode));
        }
        LOG.info("For store: {} in task: {} got current dir: {}, checkpoint dirs: {}, checkpointed changelog offset: {}", storeName, taskName, currentDirOptional, checkpointDirsOptional, checkpointedOffset);
        currentDirOptional.ifPresent(currentDir -> {
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
        });
        if (checkpointedOffset == null && oldestOffset != null) {
            // this can mean that either this is the initial migration for this feature and there are no previously
            // checkpointed changelog offsets, or that this is a new store or changelog topic after the initial migration.
            // if this is the first time migration, it might be desirable to retain existing data.
            // if this is new store or topic, it is possible that the container previously died after writing some data to
            // the changelog but before a commit, so it is desirable to delete the store, not restore anything and
            // trim the changelog
            // since we can't tell the difference b/w the two scenarios by just looking at the store and changelogs,
            // we'll request users to indicate whether to retain existing data using a config flag. this flag should only
            // be set during migrations, and turned off after the first successful commit of the new container (i.e. next
            // deploy). for simplicity, we'll always delete the local store, and restore from changelog if necessary.
            // the former scenario should not be common. the recommended way to opt-in to the transactional state feature
            // is to first upgrade to the latest samza version but keep the transactional state restore config off.
            // this will create the store checkpoint directories and write the changelog offset to the checkpoint, but
            // will not use them during restore. once this is done (i.e. at least one commit after upgrade), the
            // transactional state restore feature can be turned on on subsequent deploys. this code path exists as a
            // fail-safe against clearing changelogs in case users do not follow upgrade instructions and enable the
            // feature directly.
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since checkpointed " + "offset is null and oldest offset: {} is not.", checkpointDir, storeName, taskName, oldestOffset);
                storeDirsToDelete.put(storeName, checkpointDir);
            }));
            if (new TaskConfig(config).getTransactionalStateRetainExistingState()) {
                // mark for restore from (oldest, newest) to recreate local state.
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is true, " + "local state will be fully restored from current changelog contents. " + "There is no transactional local state guarantee.", storeName, taskName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
            } else {
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is false, " + "any local state and changelog topic contents will be deleted.", storeName, taskName);
                // mark for restore from (oldest, null) to trim entire changelog.
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, null));
            }
        } else if (// check if the checkpointed offset is out of range of current oldest and newest offsets
        admin.offsetComparator(oldestOffset, checkpointedOffset) > 0 || admin.offsetComparator(checkpointedOffset, newestOffset) > 0) {
            // checkpointed offset is out of range. this could mean that this is a TTL topic and the checkpointed
            // offset was TTLd, or that the changelog topic was manually deleted and then recreated.
            // we cannot guarantee transactional state for TTL stores, so delete everything and do a full restore
            // for local store. if the topic was deleted and recreated, this will have the side effect of
            // clearing the store as well.
            LOG.warn("Checkpointed offset: {} for store: {} in task: {} is out of range of oldest: {} or newest: {} offset." + "Deleting existing store and fully restoring from changelog topic from oldest to newest offset. If the topic " + "has time-based retention, there is no transactional local state guarantees. If the topic was changed," + "local state will be cleaned up and fully restored to match the new topic contents.", checkpointedOffset, storeName, taskName, oldestOffset, newestOffset);
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> storeDirsToDelete.put(storeName, checkpointDir)));
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
        } else {
            // happy path. checkpointed offset is in range of current oldest and newest offsets
            if (!checkpointDirsOptional.isPresent()) {
                // non-persistent logged store
                LOG.info("Did not find any checkpoint directories for logged (maybe non-persistent) store: {}. Local state " + "will be fully restored from current changelog contents.", storeName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            } else {
                // persistent logged store
                String targetOffset;
                // check checkpoint time against min.compaction.lag.ms. if older, restore from checkpointed offset to newest
                // with no trim. be conservative. allow 10% safety margin to avoid deletions when the downtime is close
                // to min.compaction.lag.ms
                long minCompactionLagMs = new StorageConfig(config).getChangelogMinCompactionLagMs(storeName);
                if (timeSinceLastCheckpointInMs > .9 * minCompactionLagMs) {
                    LOG.warn("Checkpointed offset for store: {} in task: {} is: {}. It is in range of oldest: {} and " + "newest: {} changelog offset. However, time since last checkpoint is: {}, which is greater than " + "0.9 * min.compaction.lag.ms: {} for the changelog topic. Since there is a chance that" + "the changelog topic has been compacted, restoring store to the end of the current changelog contents." + "There is no transactional local state guarantee.", storeName, taskName, checkpointedOffset, oldestOffset, newestOffset, timeSinceLastCheckpointInMs, minCompactionLagMs);
                    targetOffset = newestOffset;
                } else {
                    targetOffset = checkpointedOffset;
                }
                // if there exists a valid store checkpoint directory with oldest offset <= local offset <= target offset,
                // retain it and restore the delta. delete all other checkpoint directories for the store. if more than one such
                // checkpoint directory exists, retain the one with the highest local offset and delete the rest.
                boolean hasValidCheckpointDir = false;
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storageManagerUtil.isLoggedStoreValid(storeName, checkpointDir, config, storeChangelogs, taskModel, clock, storeEngines)) {
                        String localOffset = storageManagerUtil.readOffsetFile(checkpointDir, Collections.singleton(changelogSSP), false).get(changelogSSP);
                        LOG.info("Read local offset: {} for store: {} checkpoint dir: {} in task: {}", localOffset, storeName, checkpointDir, taskName);
                        if (admin.offsetComparator(localOffset, oldestOffset) >= 0 && admin.offsetComparator(localOffset, targetOffset) <= 0 && (storesToRestore.get(storeName) == null || admin.offsetComparator(localOffset, storesToRestore.get(storeName).startingOffset) > 0)) {
                            hasValidCheckpointDir = true;
                            LOG.info("Temporarily marking checkpoint dir: {} for store: {} in task: {} for retention. " + "May be overridden later.", checkpointDir, storeName, taskName);
                            storeDirToRetain.put(storeName, checkpointDir);
                            // mark for restore even if local == checkpointed, so that the changelog gets trimmed.
                            LOG.info("Temporarily marking store: {} in task: {} for restore from beginning offset: {} to " + "ending offset: {}. May be overridden later", storeName, taskName, localOffset, targetOffset);
                            storesToRestore.put(storeName, new RestoreOffsets(localOffset, targetOffset));
                        }
                    }
                }
                // delete all non-retained checkpoint directories
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storeDirToRetain.get(storeName) == null || !storeDirToRetain.get(storeName).equals(checkpointDir)) {
                        LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since it is not " + "marked for retention.", checkpointDir, storeName, taskName);
                        storeDirsToDelete.put(storeName, checkpointDir);
                    }
                }
                // if the store had not valid checkpoint dirs to retain, restore from changelog
                if (!hasValidCheckpointDir) {
                    storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, targetOffset));
                }
            }
        }
    });
    LOG.info("Store directories to be retained in Task: {} are: {}", taskName, storeDirToRetain);
    LOG.info("Store directories to be deleted in Task: {} are: {}", taskName, storeDirsToDelete);
    LOG.info("Stores to be restored in Task: {} are: {}", taskName, storesToRestore);
    return new StoreActions(storeDirToRetain, storeDirsToDelete, storesToRestore);
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ListMultimap(com.google.common.collect.ListMultimap) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) List(java.util.List) SystemAdmin(org.apache.samza.system.SystemAdmin) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 13 with Checkpoint

use of org.apache.samza.checkpoint.Checkpoint in project samza by apache.

the class BlobStoreRestoreManager method deleteUnusedStoresFromBlobStore.

/**
 * Deletes blob store contents for stores that were present in the last checkpoint but are either no longer
 * present in job configs (removed by user since last deployment) or are no longer configured to be backed
 * up using blob stores.
 *
 * This method blocks until all the necessary store contents and snapshot index blobs have been marked for deletion.
 */
@VisibleForTesting
static void deleteUnusedStoresFromBlobStore(String jobName, String jobId, String taskName, StorageConfig storageConfig, BlobStoreConfig blobStoreConfig, Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes, BlobStoreUtil blobStoreUtil, ExecutorService executor) {
    List<String> storesToBackup = storageConfig.getStoresWithBackupFactory(BlobStoreStateBackendFactory.class.getName());
    List<String> storesToRestore = storageConfig.getStoresWithRestoreFactory(BlobStoreStateBackendFactory.class.getName());
    List<CompletionStage<Void>> storeDeletionFutures = new ArrayList<>();
    initialStoreSnapshotIndexes.forEach((storeName, scmAndSnapshotIndex) -> {
        if (!storesToBackup.contains(storeName) && !storesToRestore.contains(storeName)) {
            LOG.debug("Removing task: {} store: {} from blob store. It is either no longer used, " + "or is no longer configured to be backed up or restored with blob store.", taskName, storeName);
            DirIndex dirIndex = scmAndSnapshotIndex.getRight().getDirIndex();
            Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
            CompletionStage<Void> storeDeletionFuture = // delete files and sub-dirs previously marked for removal
            blobStoreUtil.cleanUpDir(dirIndex, requestMetadata).thenComposeAsync(v -> blobStoreUtil.deleteDir(dirIndex, requestMetadata), // deleted files and dirs still present
            executor).thenComposeAsync(v -> blobStoreUtil.deleteSnapshotIndexBlob(scmAndSnapshotIndex.getLeft(), requestMetadata), // delete the snapshot index blob
            executor);
            storeDeletionFutures.add(storeDeletionFuture);
        }
    });
    FutureUtil.allOf(storeDeletionFutures).join();
}
Also used : BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) ArrayList(java.util.ArrayList) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) ExecutorService(java.util.concurrent.ExecutorService) FutureUtil(org.apache.samza.util.FutureUtil) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Set(java.util.Set) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Checkpoint(org.apache.samza.checkpoint.Checkpoint) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) CompletionStage(java.util.concurrent.CompletionStage) TaskRestoreManager(org.apache.samza.storage.TaskRestoreManager) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Paths(java.nio.file.Paths) Optional(java.util.Optional) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 14 with Checkpoint

use of org.apache.samza.checkpoint.Checkpoint in project samza by apache.

the class TaskStorageCommitManager method init.

public void init() {
    // Assuming that container storage manager has already started and created to stores
    storageEngines = containerStorageManager.getAllStores(taskName);
    if (checkpointManager != null) {
        Checkpoint checkpoint = checkpointManager.readLastCheckpoint(taskName);
        LOG.debug("Last checkpoint on start for task: {} is: {}", taskName, checkpoint);
        stateBackendToBackupManager.values().forEach(storageBackupManager -> storageBackupManager.init(checkpoint));
    } else {
        stateBackendToBackupManager.values().forEach(storageBackupManager -> storageBackupManager.init(null));
    }
}
Also used : Checkpoint(org.apache.samza.checkpoint.Checkpoint)

Example 15 with Checkpoint

use of org.apache.samza.checkpoint.Checkpoint in project samza by apache.

the class TestBlobStoreUtil method testGetSSISkipsStoresWithSnapshotIndexAlreadyDeleted.

@Test
public void testGetSSISkipsStoresWithSnapshotIndexAlreadyDeleted() {
    String store = "storeName1";
    String otherStore = "storeName2";
    Checkpoint checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "snapshotIndexBlobId1", otherStore, "snapshotIndexBlobId2"));
    Set<String> storesToBackupOrRestore = new HashSet<>();
    storesToBackupOrRestore.add(store);
    storesToBackupOrRestore.add(otherStore);
    SnapshotIndex store1SnapshotIndex = mock(SnapshotIndex.class);
    BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
    CompletableFuture<SnapshotIndex> failedFuture = FutureUtil.failedFuture(new DeletedException());
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId1"), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(store1SnapshotIndex));
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId2"), any(Metadata.class))).thenReturn(failedFuture);
    when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
    assertEquals(1, snapshotIndexes.size());
    assertEquals("snapshotIndexBlobId1", snapshotIndexes.get("storeName1").getLeft());
    assertEquals(store1SnapshotIndex, snapshotIndexes.get("storeName1").getRight());
}
Also used : BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) HashSet(java.util.HashSet) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Aggregations

Checkpoint (org.apache.samza.checkpoint.Checkpoint)35 Test (org.junit.Test)22 TaskName (org.apache.samza.container.TaskName)21 HashMap (java.util.HashMap)20 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)18 Map (java.util.Map)16 Partition (org.apache.samza.Partition)16 CheckpointV1 (org.apache.samza.checkpoint.CheckpointV1)16 CheckpointV2 (org.apache.samza.checkpoint.CheckpointV2)16 File (java.io.File)15 SamzaException (org.apache.samza.SamzaException)15 CheckpointId (org.apache.samza.checkpoint.CheckpointId)14 CompletableFuture (java.util.concurrent.CompletableFuture)13 MapConfig (org.apache.samza.config.MapConfig)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 Collections (java.util.Collections)12 Optional (java.util.Optional)12 CheckpointManager (org.apache.samza.checkpoint.CheckpointManager)12 TaskMode (org.apache.samza.job.model.TaskMode)12 TaskInstanceMetrics (org.apache.samza.container.TaskInstanceMetrics)11