use of org.apache.samza.SamzaException in project samza by apache.
the class TransactionalStateTaskRestoreManager method setupStoreDirs.
/**
* For each store for this task,
* a. Deletes current directory if persistent but non-logged store.
* b. Deletes current and checkpoint directories if persistent logged store and directory is marked for deletion
* c. Moves the valid persistent logged store checkpoint directory to current directory if marked for retention.
* d. Creates all missing (i.e. not retained in step c) persistent logged store dirs.
*
* When this method returns,
* a. There will be a empty current dir for each persistent but non-logged store.
* b. There will be a current dir for each persistent logged store. This dir may or may not be empty.
* c. There will be no remaining checkpoint dirs for persistent logged stores.
*/
@VisibleForTesting
static void setupStoreDirs(TaskModel taskModel, Map<String, StorageEngine> storeEngines, StoreActions storeActions, StorageManagerUtil storageManagerUtil, FileUtil fileUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory) {
TaskName taskName = taskModel.getTaskName();
TaskMode taskMode = taskModel.getTaskMode();
ListMultimap<String, File> storeDirsToDelete = storeActions.storeDirsToDelete;
Map<String, File> storeDirsToRetain = storeActions.storeDirsToRetain;
// delete all persistent store directories marked for deletion
storeDirsToDelete.entries().forEach(entry -> {
String storeName = entry.getKey();
File storeDirToDelete = entry.getValue();
LOG.info("Deleting persistent store directory: {} for store: {} in task: {}", storeDirToDelete, storeName, taskName);
fileUtil.rm(storeDirToDelete);
});
// rename all retained persistent logged store checkpoint directories to current directory
storeDirsToRetain.forEach((storeName, storeDirToRetain) -> {
File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
LOG.info("Moving logged store checkpoint directory: {} for store: {} in task: {} to current directory: {}", storeDirsToRetain.toString(), storeName, taskName, currentDir);
storageManagerUtil.restoreCheckpointFiles(storeDirToRetain, currentDir);
// do not remove the checkpoint directory yet. in case commit fails and container restarts,
// we can retry the move. if we delete the checkpoint, the current dir will be deleted as well on
// restart, and we will have to do a full restore.
});
// create any missing (not retained) current directories for persistent stores
storeEngines.forEach((storeName, storageEngine) -> {
if (storageEngine.getStoreProperties().isPersistedToDisk()) {
File currentDir;
if (storageEngine.getStoreProperties().isLoggedStore()) {
currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
} else {
currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
}
try {
if (!fileUtil.exists(currentDir.toPath())) {
LOG.info("Creating missing persistent store current directory: {} for store: {} in task: {}", currentDir, storeName, taskName);
fileUtil.createDirectories(currentDir.toPath());
}
} catch (Exception e) {
throw new SamzaException(String.format("Error setting up current directory for store: %s", storeName), e);
}
}
});
}
use of org.apache.samza.SamzaException in project samza by apache.
the class BlobStoreBackupManager method upload.
@Override
public CompletableFuture<Map<String, String>> upload(CheckpointId checkpointId, Map<String, String> storeSCMs) {
long uploadStartTime = System.nanoTime();
// reset gauges for each upload
metrics.filesToUpload.getValue().set(0L);
metrics.bytesToUpload.getValue().set(0L);
metrics.filesUploaded.getValue().set(0L);
metrics.bytesUploaded.getValue().set(0L);
metrics.filesRemaining.getValue().set(0L);
metrics.bytesRemaining.getValue().set(0L);
metrics.filesToRetain.getValue().set(0L);
metrics.bytesToRetain.getValue().set(0L);
// This map is used to atomically replace the prevStoreSnapshotIndexesFuture map at the end of the task commit
Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeToSCMAndSnapshotIndexPairFutures = new HashMap<>();
// This map is used to return serialized State Checkpoint Markers to the caller
Map<String, CompletableFuture<String>> storeToSerializedSCMFuture = new HashMap<>();
storesToBackup.forEach((storeName) -> {
long storeUploadStartTime = System.nanoTime();
try {
// metadata for the current store snapshot to upload
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
// get the local store dir corresponding to the current checkpointId
File storeDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDir, storeName, taskModel.getTaskName(), taskModel.getTaskMode());
String checkpointDirPath = storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId);
File checkpointDir = new File(checkpointDirPath);
LOG.debug("Got task: {} store: {} storeDir: {} and checkpointDir: {}", taskName, storeName, storeDir, checkpointDir);
// guaranteed to be available since a new task commit may not start until the previous one is complete
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = prevStoreSnapshotIndexesFuture.get(0, TimeUnit.MILLISECONDS);
// get the previous store directory contents
DirIndex prevDirIndex;
if (prevStoreSnapshotIndexes.containsKey(storeName)) {
prevDirIndex = prevStoreSnapshotIndexes.get(storeName).getRight().getDirIndex();
} else {
// no previous SnapshotIndex means that this is the first commit for this store. Create an empty DirIndex.
prevDirIndex = new DirIndex(checkpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
}
long dirDiffStartTime = System.nanoTime();
// get the diff between previous and current store directories
DirDiff dirDiff = DirDiffUtil.getDirDiff(checkpointDir, prevDirIndex, DirDiffUtil.areSameFile(false));
metrics.storeDirDiffNs.get(storeName).update(System.nanoTime() - dirDiffStartTime);
DirDiff.Stats stats = DirDiff.getStats(dirDiff);
updateStoreDiffMetrics(storeName, stats);
metrics.filesToUpload.getValue().addAndGet(stats.filesAdded);
metrics.bytesToUpload.getValue().addAndGet(stats.bytesAdded);
// Note: FilesRemaining metric is set to FilesAdded in the beginning of the current upload and then counted down
// for each upload.
metrics.filesRemaining.getValue().addAndGet(stats.filesAdded);
metrics.bytesRemaining.getValue().addAndGet(stats.bytesAdded);
metrics.filesToRetain.getValue().addAndGet(stats.filesRetained);
metrics.bytesToRetain.getValue().addAndGet(stats.bytesRetained);
// upload the diff to the blob store and get the new directory index
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
CompletionStage<SnapshotIndex> snapshotIndexFuture = dirIndexFuture.thenApplyAsync(dirIndex -> {
LOG.trace("Dir upload complete. Returning new SnapshotIndex for task: {} store: {}.", taskName, storeName);
Optional<String> prevSnapshotIndexBlobId = Optional.ofNullable(prevStoreSnapshotIndexes.get(storeName)).map(Pair::getLeft);
return new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevSnapshotIndexBlobId);
}, executor);
// upload the new snapshot index to the blob store and get its blob id
CompletionStage<String> snapshotIndexBlobIdFuture = snapshotIndexFuture.thenComposeAsync(si -> {
LOG.trace("Uploading Snapshot index for task: {} store: {}", taskName, storeName);
return blobStoreUtil.putSnapshotIndex(si);
}, executor);
// save store name and it's SnapshotIndex blob id and SnapshotIndex pair. At the end of the upload, atomically
// update previous snapshot index map with this.
CompletableFuture<Pair<String, SnapshotIndex>> scmAndSnapshotIndexPairFuture = FutureUtil.toFutureOfPair(Pair.of(snapshotIndexBlobIdFuture.toCompletableFuture(), snapshotIndexFuture.toCompletableFuture()));
scmAndSnapshotIndexPairFuture.whenComplete((res, ex) -> {
long uploadTimeNs = System.nanoTime() - storeUploadStartTime;
metrics.storeUploadNs.get(storeName).update(uploadTimeNs);
});
storeToSCMAndSnapshotIndexPairFutures.put(storeName, scmAndSnapshotIndexPairFuture);
storeToSerializedSCMFuture.put(storeName, snapshotIndexBlobIdFuture.toCompletableFuture());
} catch (Exception e) {
throw new SamzaException(String.format("Error uploading store snapshot to blob store for task: %s, store: %s, checkpointId: %s", taskName, storeName, checkpointId), e);
}
});
// replace the previous storeName to snapshot index mapping with the new mapping.
this.prevStoreSnapshotIndexesFuture = FutureUtil.toFutureOfMap(storeToSCMAndSnapshotIndexPairFutures);
return FutureUtil.toFutureOfMap(storeToSerializedSCMFuture).whenComplete((res, ex) -> metrics.uploadNs.update(System.nanoTime() - uploadStartTime));
}
use of org.apache.samza.SamzaException in project samza by apache.
the class ContainerLaunchUtil method createContainerHeartbeatMonitor.
/**
* Creates a new container heartbeat monitor if possible.
* @param container the container to monitor
* @param coordinatorStreamStore the metadata store to fetch coordinator url from
* @param config the job configuration
* @return a new {@link ContainerHeartbeatMonitor} instance, or null if could not create one
*/
private static ContainerHeartbeatMonitor createContainerHeartbeatMonitor(SamzaContainer container, MetadataStore coordinatorStreamStore, Config config) {
if (new JobConfig(config).getContainerHeartbeatMonitorEnabled()) {
String coordinatorUrl = System.getenv(ShellCommandConfig.ENV_COORDINATOR_URL);
String executionEnvContainerId = System.getenv(ShellCommandConfig.ENV_EXECUTION_ENV_CONTAINER_ID);
if (executionEnvContainerId != null) {
log.info("Got execution environment container id for container heartbeat monitor: {}", executionEnvContainerId);
return new ContainerHeartbeatMonitor(() -> {
try {
container.shutdown();
containerRunnerException = new SamzaException("Container shutdown due to expired heartbeat");
} catch (Exception e) {
log.error("Heartbeat monitor failed to shutdown the container gracefully. Exiting process.", e);
System.exit(1);
}
}, coordinatorUrl, executionEnvContainerId, coordinatorStreamStore, config);
} else {
log.warn("Container heartbeat monitor is enabled, but execution environment container id is not set. " + "Container heartbeat monitor will not be created");
return null;
}
} else {
log.info("Container heartbeat monitor is disabled");
return null;
}
}
use of org.apache.samza.SamzaException in project samza by apache.
the class ChangelogStreamManager method createChangelogStreams.
/**
* Creates and validates the changelog streams of a samza job.
*
* @param config the configuration with changelog info.
* @param maxChangeLogStreamPartitions the maximum number of changelog stream partitions to create.
*/
public static void createChangelogStreams(Config config, int maxChangeLogStreamPartitions) {
// Get changelog store config
StorageConfig storageConfig = new StorageConfig(config);
ImmutableMap.Builder<String, SystemStream> storeNameSystemStreamMapBuilder = new ImmutableMap.Builder<>();
storageConfig.getStoreNames().forEach(storeName -> {
Optional<String> changelogStream = storageConfig.getChangelogStream(storeName);
if (changelogStream.isPresent() && StringUtils.isNotBlank(changelogStream.get())) {
storeNameSystemStreamMapBuilder.put(storeName, StreamUtil.getSystemStreamFromNames(changelogStream.get()));
}
});
Map<String, SystemStream> storeNameSystemStreamMapping = storeNameSystemStreamMapBuilder.build();
// Get SystemAdmin for changelog store's system and attempt to create the stream
SystemConfig systemConfig = new SystemConfig(config);
storeNameSystemStreamMapping.forEach((storeName, systemStream) -> {
// Load system admin for this system.
SystemAdmin systemAdmin = systemConfig.getSystemFactories().get(systemStream.getSystem()).getAdmin(systemStream.getSystem(), config, ChangelogStreamManager.class.getSimpleName());
if (systemAdmin == null) {
throw new SamzaException(String.format("Error creating changelog. Changelog on store %s uses system %s, which is missing from the configuration.", storeName, systemStream.getSystem()));
}
StreamSpec changelogSpec = StreamSpec.createChangeLogStreamSpec(systemStream.getStream(), systemStream.getSystem(), maxChangeLogStreamPartitions);
systemAdmin.start();
if (systemAdmin.createStream(changelogSpec)) {
LOG.info(String.format("created changelog stream %s.", systemStream.getStream()));
} else {
LOG.info(String.format("changelog stream %s already exists.", systemStream.getStream()));
}
systemAdmin.validateStream(changelogSpec);
if (storageConfig.getAccessLogEnabled(storeName)) {
String accesslogStream = storageConfig.getAccessLogStream(systemStream.getStream());
StreamSpec accesslogSpec = new StreamSpec(accesslogStream, accesslogStream, systemStream.getSystem(), maxChangeLogStreamPartitions);
systemAdmin.createStream(accesslogSpec);
systemAdmin.validateStream(accesslogSpec);
}
systemAdmin.stop();
});
}
use of org.apache.samza.SamzaException in project samza by apache.
the class StartpointManager method readStartpoint.
/**
* Returns the {@link Startpoint} for a {@link SystemStreamPartition} and {@link TaskName}.
* @param ssp The {@link SystemStreamPartition} to fetch the {@link Startpoint} for.
* @param taskName The {@link TaskName} to fetch the {@link Startpoint} for.
* @return {@link Optional} of {@link Startpoint} for the {@link SystemStreamPartition} and {@link TaskName}.
* It is empty if it does not exist or if it is too stale.
*/
public Optional<Startpoint> readStartpoint(Map<String, byte[]> startpointMap, SystemStreamPartition ssp, TaskName taskName) {
Preconditions.checkState(!stopped, "Underlying metadata store not available");
Preconditions.checkNotNull(ssp, "SystemStreamPartition cannot be null");
byte[] startpointBytes = startpointMap.get(toReadWriteStoreKey(ssp, taskName));
if (ArrayUtils.isNotEmpty(startpointBytes)) {
try {
Startpoint startpoint = objectMapper.readValue(startpointBytes, Startpoint.class);
if (Instant.now().minus(DEFAULT_EXPIRATION_DURATION).isBefore(Instant.ofEpochMilli(startpoint.getCreationTimestamp()))) {
// return if deserializable and if not stale
return Optional.of(startpoint);
}
LOG.warn("Creation timestamp: {} of startpoint: {} has crossed the expiration duration: {}. Ignoring it", startpoint.getCreationTimestamp(), startpoint, DEFAULT_EXPIRATION_DURATION);
} catch (IOException ex) {
throw new SamzaException(ex);
}
}
return Optional.empty();
}
Aggregations