use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class ContainerStorageManager method restoreStores.
// Restoration of all stores, in parallel across tasks
private void restoreStores() throws InterruptedException {
LOG.info("Store Restore started");
Set<TaskName> activeTasks = getTasks(containerModel, TaskMode.Active).keySet();
// TODO HIGH dchen verify davinci lifecycle
// Find all non-side input stores
Set<String> nonSideInputStoreNames = storageEngineFactories.keySet().stream().filter(storeName -> !sideInputStoreNames.contains(storeName)).collect(Collectors.toSet());
// Obtain the checkpoints for each task
Map<TaskName, Map<String, TaskRestoreManager>> taskRestoreManagers = new HashMap<>();
Map<TaskName, Checkpoint> taskCheckpoints = new HashMap<>();
containerModel.getTasks().forEach((taskName, taskModel) -> {
Checkpoint taskCheckpoint = null;
if (checkpointManager != null && activeTasks.contains(taskName)) {
// only pass in checkpoints for active tasks
taskCheckpoint = checkpointManager.readLastCheckpoint(taskName);
LOG.info("Obtained checkpoint: {} for state restore for taskName: {}", taskCheckpoint, taskName);
}
taskCheckpoints.put(taskName, taskCheckpoint);
Map<String, Set<String>> backendFactoryStoreNames = getBackendFactoryStoreNames(taskCheckpoint, nonSideInputStoreNames, new StorageConfig(config));
Map<String, TaskRestoreManager> taskStoreRestoreManagers = createTaskRestoreManagers(restoreStateBackendFactories, backendFactoryStoreNames, clock, samzaContainerMetrics, taskName, taskModel);
taskRestoreManagers.put(taskName, taskStoreRestoreManagers);
});
// Initialize each TaskStorageManager
taskRestoreManagers.forEach((taskName, restoreManagers) -> restoreManagers.forEach((factoryName, taskRestoreManager) -> taskRestoreManager.init(taskCheckpoints.get(taskName))));
// Start each store consumer once.
// Note: These consumers are per system and only changelog system store consumers will be started.
// Some TaskRestoreManagers may not require the consumer to to be started, but due to the agnostic nature of
// ContainerStorageManager we always start the changelog consumer here in case it is required
this.storeConsumers.values().stream().distinct().forEach(SystemConsumer::start);
List<Future<Void>> taskRestoreFutures = new ArrayList<>();
// Submit restore callable for each taskInstance
taskRestoreManagers.forEach((taskInstance, restoreManagersMap) -> {
// Submit for each restore factory
restoreManagersMap.forEach((factoryName, taskRestoreManager) -> {
long startTime = System.currentTimeMillis();
String taskName = taskInstance.getTaskName();
LOG.info("Starting restore for state for task: {}", taskName);
CompletableFuture<Void> restoreFuture = taskRestoreManager.restore().handle((res, ex) -> {
// on stop, so paralleling stop() also parallelizes their compaction (a time-intensive operation).
try {
taskRestoreManager.close();
} catch (Exception e) {
LOG.error("Error closing restore manager for task: {} after {} restore", taskName, ex != null ? "unsuccessful" : "successful", e);
// ignore exception from close. container may still be be able to continue processing/backups
// if restore manager close fails.
}
long timeToRestore = System.currentTimeMillis() - startTime;
if (samzaContainerMetrics != null) {
Gauge taskGauge = samzaContainerMetrics.taskStoreRestorationMetrics().getOrDefault(taskInstance, null);
if (taskGauge != null) {
taskGauge.set(timeToRestore);
}
}
if (ex != null) {
// log and rethrow exception to communicate restore failure
String msg = String.format("Error restoring state for task: %s", taskName);
LOG.error(msg, ex);
// wrap in unchecked exception to throw from lambda
throw new SamzaException(msg, ex);
} else {
return null;
}
});
taskRestoreFutures.add(restoreFuture);
});
});
// as samza exceptions
for (Future<Void> future : taskRestoreFutures) {
try {
future.get();
} catch (InterruptedException e) {
LOG.warn("Received an interrupt during store restoration. Interrupting the restore executor to exit " + "prematurely without restoring full state.");
restoreExecutor.shutdownNow();
throw e;
} catch (Exception e) {
LOG.error("Exception when restoring state.", e);
throw new SamzaException("Exception when restoring state.", e);
}
}
// Stop each store consumer once
this.storeConsumers.values().stream().distinct().forEach(SystemConsumer::stop);
// Now create persistent non side input stores in read-write mode, leave non-persistent stores as-is
this.taskStores = createTaskStores(nonSideInputStoreNames, this.containerModel, jobContext, containerContext, storageEngineFactories, serdes, taskInstanceMetrics, taskInstanceCollectors);
// Add in memory stores
this.inMemoryStores.forEach((taskName, stores) -> {
if (!this.taskStores.containsKey(taskName)) {
taskStores.put(taskName, new HashMap<>());
}
taskStores.get(taskName).putAll(stores);
});
// Add side input stores
this.sideInputStores.forEach((taskName, stores) -> {
if (!this.taskStores.containsKey(taskName)) {
taskStores.put(taskName, new HashMap<>());
}
taskStores.get(taskName).putAll(stores);
});
LOG.info("Store Restore complete");
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class ZkJobCoordinator method generateNewJobModel.
/**
* Generate new JobModel when becoming a leader or the list of processor changed.
*/
@VisibleForTesting
JobModel generateNewJobModel(List<ProcessorNode> processorNodes) {
String zkJobModelVersion = zkUtils.getJobModelVersion();
// If JobModel exists in zookeeper && cached JobModel version is unequal to JobModel version stored in zookeeper.
if (zkJobModelVersion != null && !Objects.equals(cachedJobModelVersion, zkJobModelVersion)) {
JobModel jobModel = readJobModelFromMetadataStore(zkJobModelVersion);
for (ContainerModel containerModel : jobModel.getContainers().values()) {
containerModel.getTasks().forEach((taskName, taskModel) -> changeLogPartitionMap.put(taskName, taskModel.getChangelogPartition().getPartitionId()));
}
cachedJobModelVersion = zkJobModelVersion;
}
GrouperMetadata grouperMetadata = getGrouperMetadata(zkJobModelVersion, processorNodes);
JobModel model = JobModelCalculator.INSTANCE.calculateJobModel(config, changeLogPartitionMap, streamMetadataCache, grouperMetadata);
return new JobModel(new MapConfig(), model.getContainers());
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class DiagnosticsStreamMessage method deserializeContainerModelMap.
/**
* Helper method to use {@link SamzaObjectMapper} to deserialize {@link ContainerModel}s.
* {@link SamzaObjectMapper} provides several conventions and optimizations for deserializing containerModels.
* @return
*/
private static Map<String, ContainerModel> deserializeContainerModelMap(String serializedContainerModel) {
Map<String, ContainerModel> containerModelMap = null;
ObjectMapper samzaObjectMapper = SamzaObjectMapper.getObjectMapper();
try {
if (serializedContainerModel != null) {
containerModelMap = samzaObjectMapper.readValue(serializedContainerModel, new TypeReference<Map<String, ContainerModel>>() {
});
}
} catch (IOException e) {
LOG.error("Exception in deserializing container model ", e);
}
return containerModelMap;
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestKafkaChangelogStateBackendFactory method testGetChangelogSSP.
@Test
public void testGetChangelogSSP() {
KafkaChangelogStateBackendFactory factory = new KafkaChangelogStateBackendFactory();
TaskName taskName0 = new TaskName("task0");
TaskName taskName1 = new TaskName("task1");
TaskModel taskModel0 = new TaskModel(taskName0, ImmutableSet.of(new SystemStreamPartition("input", "stream", new Partition(0))), new Partition(10));
TaskModel taskModel1 = new TaskModel(taskName1, ImmutableSet.of(new SystemStreamPartition("input", "stream", new Partition(1))), new Partition(11));
ContainerModel containerModel = new ContainerModel("processorId", ImmutableMap.of(taskName0, taskModel0, taskName1, taskModel1));
Map<String, SystemStream> changeLogSystemStreams = ImmutableMap.of("store0", new SystemStream("changelogSystem0", "store0-changelog"), "store1", new SystemStream("changelogSystem1", "store1-changelog"));
Set<SystemStreamPartition> expected = ImmutableSet.of(new SystemStreamPartition("changelogSystem0", "store0-changelog", new Partition(10)), new SystemStreamPartition("changelogSystem1", "store1-changelog", new Partition(10)), new SystemStreamPartition("changelogSystem0", "store0-changelog", new Partition(11)), new SystemStreamPartition("changelogSystem1", "store1-changelog", new Partition(11)));
Assert.assertEquals(expected, factory.getChangelogSSPForContainer(changeLogSystemStreams, new ContainerContextImpl(containerModel, null)));
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestSamzaObjectMapper method getPreEleasticObjectMapper.
public static ObjectMapper getPreEleasticObjectMapper() {
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.WRAP_EXCEPTIONS, false);
mapper.configure(SerializationFeature.WRAP_EXCEPTIONS, false);
SimpleModule module = new SimpleModule("SamzaModule", new Version(1, 0, 0, ""));
// Setup custom serdes for simple data types.
module.addSerializer(Partition.class, new SamzaObjectMapper.PartitionSerializer());
module.addSerializer(SystemStreamPartition.class, new PreElasticitySystemStreamPartitionSerializer());
module.addKeySerializer(SystemStreamPartition.class, new SamzaObjectMapper.SystemStreamPartitionKeySerializer());
module.addSerializer(TaskName.class, new SamzaObjectMapper.TaskNameSerializer());
module.addSerializer(TaskMode.class, new SamzaObjectMapper.TaskModeSerializer());
module.addDeserializer(TaskName.class, new SamzaObjectMapper.TaskNameDeserializer());
module.addDeserializer(Partition.class, new SamzaObjectMapper.PartitionDeserializer());
module.addDeserializer(SystemStreamPartition.class, new PreElasticitySystemStreamPartitionDeserializer());
module.addKeyDeserializer(SystemStreamPartition.class, new SamzaObjectMapper.SystemStreamPartitionKeyDeserializer());
module.addDeserializer(Config.class, new SamzaObjectMapper.ConfigDeserializer());
module.addDeserializer(TaskMode.class, new SamzaObjectMapper.TaskModeDeserializer());
module.addSerializer(CheckpointId.class, new SamzaObjectMapper.CheckpointIdSerializer());
module.addDeserializer(CheckpointId.class, new SamzaObjectMapper.CheckpointIdDeserializer());
// Setup mixins for data models.
mapper.addMixIn(TaskModel.class, JsonTaskModelMixIn.class);
mapper.addMixIn(ContainerModel.class, JsonContainerModelMixIn.class);
mapper.addMixIn(JobModel.class, JsonJobModelMixIn.class);
mapper.addMixIn(CheckpointV2.class, JsonCheckpointV2Mixin.class);
mapper.addMixIn(KafkaStateCheckpointMarker.class, KafkaStateCheckpointMarkerMixin.class);
module.addDeserializer(ContainerModel.class, new JsonDeserializer<ContainerModel>() {
@Override
public ContainerModel deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
ObjectCodec oc = jp.getCodec();
JsonNode node = oc.readTree(jp);
/*
* Before Samza 0.13, "container-id" was used.
* In Samza 0.13, "processor-id" was added to be the id to use and "container-id" was deprecated. However,
* "container-id" still needed to be checked for backwards compatibility in case "processor-id" was missing
* (i.e. from a job model corresponding to a version of the job that was on a pre Samza 0.13 version).
* In Samza 1.0, "container-id" was further cleaned up from ContainerModel. This logic is still being left here
* as a fallback for backwards compatibility with pre Samza 0.13. ContainerModel.getProcessorId was changed to
* ContainerModel.getId in the Java API, but "processor-id" still needs to be used as the JSON key for backwards
* compatibility with Samza 0.13 and Samza 0.14.
*/
String id;
if (node.get(JsonContainerModelMixIn.PROCESSOR_ID_KEY) == null) {
if (node.get(JsonContainerModelMixIn.CONTAINER_ID_KEY) == null) {
throw new SamzaException(String.format("JobModel was missing %s and %s. This should never happen. JobModel corrupt!", JsonContainerModelMixIn.PROCESSOR_ID_KEY, JsonContainerModelMixIn.CONTAINER_ID_KEY));
}
id = String.valueOf(node.get(JsonContainerModelMixIn.CONTAINER_ID_KEY).intValue());
} else {
id = node.get(JsonContainerModelMixIn.PROCESSOR_ID_KEY).textValue();
}
Map<TaskName, TaskModel> tasksMapping = OBJECT_MAPPER.readValue(OBJECT_MAPPER.treeAsTokens(node.get(JsonContainerModelMixIn.TASKS_KEY)), new TypeReference<Map<TaskName, TaskModel>>() {
});
return new ContainerModel(id, tasksMapping);
}
});
mapper.addMixIn(LocalityModel.class, JsonLocalityModelMixIn.class);
mapper.addMixIn(ProcessorLocality.class, JsonProcessorLocalityMixIn.class);
// Register mixins for job coordinator metadata model
mapper.addMixIn(JobCoordinatorMetadata.class, JsonJobCoordinatorMetadataMixIn.class);
// Convert camel case to hyphenated field names, and register the module.
mapper.setPropertyNamingStrategy(new SamzaObjectMapper.CamelCaseToDashesStrategy());
mapper.registerModule(module);
return mapper;
}
Aggregations