Search in sources :

Example 66 with SamzaException

use of org.apache.samza.SamzaException in project samza by apache.

the class TransactionalStateTaskRestoreManager method setupStoreDirs.

/**
 * For each store for this task,
 * a. Deletes current directory if persistent but non-logged store.
 * b. Deletes current and checkpoint directories if persistent logged store and directory is marked for deletion
 * c. Moves the valid persistent logged store checkpoint directory to current directory if marked for retention.
 * d. Creates all missing (i.e. not retained in step c) persistent logged store dirs.
 *
 * When this method returns,
 * a. There will be a empty current dir for each persistent but non-logged store.
 * b. There will be a current dir for each persistent logged store. This dir may or may not be empty.
 * c. There will be no remaining checkpoint dirs for persistent logged stores.
 */
@VisibleForTesting
static void setupStoreDirs(TaskModel taskModel, Map<String, StorageEngine> storeEngines, StoreActions storeActions, StorageManagerUtil storageManagerUtil, FileUtil fileUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    ListMultimap<String, File> storeDirsToDelete = storeActions.storeDirsToDelete;
    Map<String, File> storeDirsToRetain = storeActions.storeDirsToRetain;
    // delete all persistent store directories marked for deletion
    storeDirsToDelete.entries().forEach(entry -> {
        String storeName = entry.getKey();
        File storeDirToDelete = entry.getValue();
        LOG.info("Deleting persistent store directory: {} for store: {} in task: {}", storeDirToDelete, storeName, taskName);
        fileUtil.rm(storeDirToDelete);
    });
    // rename all retained persistent logged store checkpoint directories to current directory
    storeDirsToRetain.forEach((storeName, storeDirToRetain) -> {
        File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
        LOG.info("Moving logged store checkpoint directory: {} for store: {} in task: {} to current directory: {}", storeDirsToRetain.toString(), storeName, taskName, currentDir);
        storageManagerUtil.restoreCheckpointFiles(storeDirToRetain, currentDir);
    // do not remove the checkpoint directory yet. in case commit fails and container restarts,
    // we can retry the move. if we delete the checkpoint, the current dir will be deleted as well on
    // restart, and we will have to do a full restore.
    });
    // create any missing (not retained) current directories for persistent stores
    storeEngines.forEach((storeName, storageEngine) -> {
        if (storageEngine.getStoreProperties().isPersistedToDisk()) {
            File currentDir;
            if (storageEngine.getStoreProperties().isLoggedStore()) {
                currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
            } else {
                currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
            }
            try {
                if (!fileUtil.exists(currentDir.toPath())) {
                    LOG.info("Creating missing persistent store current directory: {} for store: {} in task: {}", currentDir, storeName, taskName);
                    fileUtil.createDirectories(currentDir.toPath());
                }
            } catch (Exception e) {
                throw new SamzaException(String.format("Error setting up current directory for store: %s", storeName), e);
            }
        }
    });
}
Also used : TaskName(org.apache.samza.container.TaskName) TaskMode(org.apache.samza.job.model.TaskMode) File(java.io.File) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 67 with SamzaException

use of org.apache.samza.SamzaException in project samza by apache.

the class BlobStoreBackupManager method upload.

@Override
public CompletableFuture<Map<String, String>> upload(CheckpointId checkpointId, Map<String, String> storeSCMs) {
    long uploadStartTime = System.nanoTime();
    // reset gauges for each upload
    metrics.filesToUpload.getValue().set(0L);
    metrics.bytesToUpload.getValue().set(0L);
    metrics.filesUploaded.getValue().set(0L);
    metrics.bytesUploaded.getValue().set(0L);
    metrics.filesRemaining.getValue().set(0L);
    metrics.bytesRemaining.getValue().set(0L);
    metrics.filesToRetain.getValue().set(0L);
    metrics.bytesToRetain.getValue().set(0L);
    // This map is used to atomically replace the prevStoreSnapshotIndexesFuture map at the end of the task commit
    Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeToSCMAndSnapshotIndexPairFutures = new HashMap<>();
    // This map is used to return serialized State Checkpoint Markers to the caller
    Map<String, CompletableFuture<String>> storeToSerializedSCMFuture = new HashMap<>();
    storesToBackup.forEach((storeName) -> {
        long storeUploadStartTime = System.nanoTime();
        try {
            // metadata for the current store snapshot to upload
            SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
            // get the local store dir corresponding to the current checkpointId
            File storeDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDir, storeName, taskModel.getTaskName(), taskModel.getTaskMode());
            String checkpointDirPath = storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId);
            File checkpointDir = new File(checkpointDirPath);
            LOG.debug("Got task: {} store: {} storeDir: {} and checkpointDir: {}", taskName, storeName, storeDir, checkpointDir);
            // guaranteed to be available since a new task commit may not start until the previous one is complete
            Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = prevStoreSnapshotIndexesFuture.get(0, TimeUnit.MILLISECONDS);
            // get the previous store directory contents
            DirIndex prevDirIndex;
            if (prevStoreSnapshotIndexes.containsKey(storeName)) {
                prevDirIndex = prevStoreSnapshotIndexes.get(storeName).getRight().getDirIndex();
            } else {
                // no previous SnapshotIndex means that this is the first commit for this store. Create an empty DirIndex.
                prevDirIndex = new DirIndex(checkpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
            }
            long dirDiffStartTime = System.nanoTime();
            // get the diff between previous and current store directories
            DirDiff dirDiff = DirDiffUtil.getDirDiff(checkpointDir, prevDirIndex, DirDiffUtil.areSameFile(false));
            metrics.storeDirDiffNs.get(storeName).update(System.nanoTime() - dirDiffStartTime);
            DirDiff.Stats stats = DirDiff.getStats(dirDiff);
            updateStoreDiffMetrics(storeName, stats);
            metrics.filesToUpload.getValue().addAndGet(stats.filesAdded);
            metrics.bytesToUpload.getValue().addAndGet(stats.bytesAdded);
            // Note: FilesRemaining metric is set to FilesAdded in the beginning of the current upload and then counted down
            // for each upload.
            metrics.filesRemaining.getValue().addAndGet(stats.filesAdded);
            metrics.bytesRemaining.getValue().addAndGet(stats.bytesAdded);
            metrics.filesToRetain.getValue().addAndGet(stats.filesRetained);
            metrics.bytesToRetain.getValue().addAndGet(stats.bytesRetained);
            // upload the diff to the blob store and get the new directory index
            CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
            CompletionStage<SnapshotIndex> snapshotIndexFuture = dirIndexFuture.thenApplyAsync(dirIndex -> {
                LOG.trace("Dir upload complete. Returning new SnapshotIndex for task: {} store: {}.", taskName, storeName);
                Optional<String> prevSnapshotIndexBlobId = Optional.ofNullable(prevStoreSnapshotIndexes.get(storeName)).map(Pair::getLeft);
                return new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevSnapshotIndexBlobId);
            }, executor);
            // upload the new snapshot index to the blob store and get its blob id
            CompletionStage<String> snapshotIndexBlobIdFuture = snapshotIndexFuture.thenComposeAsync(si -> {
                LOG.trace("Uploading Snapshot index for task: {} store: {}", taskName, storeName);
                return blobStoreUtil.putSnapshotIndex(si);
            }, executor);
            // save store name and it's SnapshotIndex blob id and SnapshotIndex pair. At the end of the upload, atomically
            // update previous snapshot index map with this.
            CompletableFuture<Pair<String, SnapshotIndex>> scmAndSnapshotIndexPairFuture = FutureUtil.toFutureOfPair(Pair.of(snapshotIndexBlobIdFuture.toCompletableFuture(), snapshotIndexFuture.toCompletableFuture()));
            scmAndSnapshotIndexPairFuture.whenComplete((res, ex) -> {
                long uploadTimeNs = System.nanoTime() - storeUploadStartTime;
                metrics.storeUploadNs.get(storeName).update(uploadTimeNs);
            });
            storeToSCMAndSnapshotIndexPairFutures.put(storeName, scmAndSnapshotIndexPairFuture);
            storeToSerializedSCMFuture.put(storeName, snapshotIndexBlobIdFuture.toCompletableFuture());
        } catch (Exception e) {
            throw new SamzaException(String.format("Error uploading store snapshot to blob store for task: %s, store: %s, checkpointId: %s", taskName, storeName, checkpointId), e);
        }
    });
    // replace the previous storeName to snapshot index mapping with the new mapping.
    this.prevStoreSnapshotIndexesFuture = FutureUtil.toFutureOfMap(storeToSCMAndSnapshotIndexPairFutures);
    return FutureUtil.toFutureOfMap(storeToSerializedSCMFuture).whenComplete((res, ex) -> metrics.uploadNs.update(System.nanoTime() - uploadStartTime));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 68 with SamzaException

use of org.apache.samza.SamzaException in project samza by apache.

the class ContainerLaunchUtil method createContainerHeartbeatMonitor.

/**
 * Creates a new container heartbeat monitor if possible.
 * @param container the container to monitor
 * @param coordinatorStreamStore the metadata store to fetch coordinator url from
 * @param config the job configuration
 * @return a new {@link ContainerHeartbeatMonitor} instance, or null if could not create one
 */
private static ContainerHeartbeatMonitor createContainerHeartbeatMonitor(SamzaContainer container, MetadataStore coordinatorStreamStore, Config config) {
    if (new JobConfig(config).getContainerHeartbeatMonitorEnabled()) {
        String coordinatorUrl = System.getenv(ShellCommandConfig.ENV_COORDINATOR_URL);
        String executionEnvContainerId = System.getenv(ShellCommandConfig.ENV_EXECUTION_ENV_CONTAINER_ID);
        if (executionEnvContainerId != null) {
            log.info("Got execution environment container id for container heartbeat monitor: {}", executionEnvContainerId);
            return new ContainerHeartbeatMonitor(() -> {
                try {
                    container.shutdown();
                    containerRunnerException = new SamzaException("Container shutdown due to expired heartbeat");
                } catch (Exception e) {
                    log.error("Heartbeat monitor failed to shutdown the container gracefully. Exiting process.", e);
                    System.exit(1);
                }
            }, coordinatorUrl, executionEnvContainerId, coordinatorStreamStore, config);
        } else {
            log.warn("Container heartbeat monitor is enabled, but execution environment container id is not set. " + "Container heartbeat monitor will not be created");
            return null;
        }
    } else {
        log.info("Container heartbeat monitor is disabled");
        return null;
    }
}
Also used : ContainerHeartbeatMonitor(org.apache.samza.container.ContainerHeartbeatMonitor) SamzaException(org.apache.samza.SamzaException) JobConfig(org.apache.samza.config.JobConfig) SamzaException(org.apache.samza.SamzaException)

Example 69 with SamzaException

use of org.apache.samza.SamzaException in project samza by apache.

the class ChangelogStreamManager method createChangelogStreams.

/**
 * Creates and validates the changelog streams of a samza job.
 *
 * @param config the configuration with changelog info.
 * @param maxChangeLogStreamPartitions the maximum number of changelog stream partitions to create.
 */
public static void createChangelogStreams(Config config, int maxChangeLogStreamPartitions) {
    // Get changelog store config
    StorageConfig storageConfig = new StorageConfig(config);
    ImmutableMap.Builder<String, SystemStream> storeNameSystemStreamMapBuilder = new ImmutableMap.Builder<>();
    storageConfig.getStoreNames().forEach(storeName -> {
        Optional<String> changelogStream = storageConfig.getChangelogStream(storeName);
        if (changelogStream.isPresent() && StringUtils.isNotBlank(changelogStream.get())) {
            storeNameSystemStreamMapBuilder.put(storeName, StreamUtil.getSystemStreamFromNames(changelogStream.get()));
        }
    });
    Map<String, SystemStream> storeNameSystemStreamMapping = storeNameSystemStreamMapBuilder.build();
    // Get SystemAdmin for changelog store's system and attempt to create the stream
    SystemConfig systemConfig = new SystemConfig(config);
    storeNameSystemStreamMapping.forEach((storeName, systemStream) -> {
        // Load system admin for this system.
        SystemAdmin systemAdmin = systemConfig.getSystemFactories().get(systemStream.getSystem()).getAdmin(systemStream.getSystem(), config, ChangelogStreamManager.class.getSimpleName());
        if (systemAdmin == null) {
            throw new SamzaException(String.format("Error creating changelog. Changelog on store %s uses system %s, which is missing from the configuration.", storeName, systemStream.getSystem()));
        }
        StreamSpec changelogSpec = StreamSpec.createChangeLogStreamSpec(systemStream.getStream(), systemStream.getSystem(), maxChangeLogStreamPartitions);
        systemAdmin.start();
        if (systemAdmin.createStream(changelogSpec)) {
            LOG.info(String.format("created changelog stream %s.", systemStream.getStream()));
        } else {
            LOG.info(String.format("changelog stream %s already exists.", systemStream.getStream()));
        }
        systemAdmin.validateStream(changelogSpec);
        if (storageConfig.getAccessLogEnabled(storeName)) {
            String accesslogStream = storageConfig.getAccessLogStream(systemStream.getStream());
            StreamSpec accesslogSpec = new StreamSpec(accesslogStream, accesslogStream, systemStream.getSystem(), maxChangeLogStreamPartitions);
            systemAdmin.createStream(accesslogSpec);
            systemAdmin.validateStream(accesslogSpec);
        }
        systemAdmin.stop();
    });
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) SystemConfig(org.apache.samza.config.SystemConfig) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) SamzaException(org.apache.samza.SamzaException) ImmutableMap(com.google.common.collect.ImmutableMap) SystemAdmin(org.apache.samza.system.SystemAdmin)

Example 70 with SamzaException

use of org.apache.samza.SamzaException in project samza by apache.

the class StartpointManager method readStartpoint.

/**
 * Returns the {@link Startpoint} for a {@link SystemStreamPartition} and {@link TaskName}.
 * @param ssp The {@link SystemStreamPartition} to fetch the {@link Startpoint} for.
 * @param taskName The {@link TaskName} to fetch the {@link Startpoint} for.
 * @return {@link Optional} of {@link Startpoint} for the {@link SystemStreamPartition} and {@link TaskName}.
 *         It is empty if it does not exist or if it is too stale.
 */
public Optional<Startpoint> readStartpoint(Map<String, byte[]> startpointMap, SystemStreamPartition ssp, TaskName taskName) {
    Preconditions.checkState(!stopped, "Underlying metadata store not available");
    Preconditions.checkNotNull(ssp, "SystemStreamPartition cannot be null");
    byte[] startpointBytes = startpointMap.get(toReadWriteStoreKey(ssp, taskName));
    if (ArrayUtils.isNotEmpty(startpointBytes)) {
        try {
            Startpoint startpoint = objectMapper.readValue(startpointBytes, Startpoint.class);
            if (Instant.now().minus(DEFAULT_EXPIRATION_DURATION).isBefore(Instant.ofEpochMilli(startpoint.getCreationTimestamp()))) {
                // return if deserializable and if not stale
                return Optional.of(startpoint);
            }
            LOG.warn("Creation timestamp: {} of startpoint: {} has crossed the expiration duration: {}. Ignoring it", startpoint.getCreationTimestamp(), startpoint, DEFAULT_EXPIRATION_DURATION);
        } catch (IOException ex) {
            throw new SamzaException(ex);
        }
    }
    return Optional.empty();
}
Also used : IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException)

Aggregations

SamzaException (org.apache.samza.SamzaException)256 IOException (java.io.IOException)61 HashMap (java.util.HashMap)57 Test (org.junit.Test)40 Map (java.util.Map)38 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)34 ArrayList (java.util.ArrayList)30 List (java.util.List)27 File (java.io.File)26 JobConfig (org.apache.samza.config.JobConfig)26 Config (org.apache.samza.config.Config)25 VisibleForTesting (com.google.common.annotations.VisibleForTesting)24 SystemStream (org.apache.samza.system.SystemStream)24 CompletableFuture (java.util.concurrent.CompletableFuture)23 Logger (org.slf4j.Logger)21 LoggerFactory (org.slf4j.LoggerFactory)21 Set (java.util.Set)20 Collections (java.util.Collections)19 MapConfig (org.apache.samza.config.MapConfig)18 TaskName (org.apache.samza.container.TaskName)18