use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.
the class DiagnosticsUtil method buildDiagnosticsManager.
/**
* Create a {@link DiagnosticsManager} for the given jobName, jobId, containerId, and execEnvContainerId, if
* diagnostics is enabled.
* @param executionEnvContainerId ID assigned to the container by the cluster manager (e.g. YARN)
* @param samzaEpochId ID assigned to the job deployment attempt by the cluster manager
*/
public static Optional<DiagnosticsManager> buildDiagnosticsManager(String jobName, String jobId, JobModel jobModel, String containerId, Optional<String> executionEnvContainerId, Optional<String> samzaEpochId, Config config) {
JobConfig jobConfig = new JobConfig(config);
MetricsConfig metricsConfig = new MetricsConfig(config);
Optional<DiagnosticsManager> diagnosticsManagerOptional = Optional.empty();
if (jobConfig.getDiagnosticsEnabled()) {
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(config);
int containerMemoryMb = clusterManagerConfig.getContainerMemoryMb();
int containerNumCores = clusterManagerConfig.getNumCores();
long maxHeapSizeBytes = Runtime.getRuntime().maxMemory();
int containerThreadPoolSize = jobConfig.getThreadPoolSize();
String taskClassVersion = Util.getTaskClassVersion(config);
String samzaVersion = Util.getSamzaVersion();
String hostName = Util.getLocalHost().getHostName();
Optional<String> diagnosticsReporterStreamName = metricsConfig.getMetricsSnapshotReporterStream(MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS);
if (!diagnosticsReporterStreamName.isPresent()) {
throw new ConfigException("Missing required config: " + String.format(MetricsConfig.METRICS_SNAPSHOT_REPORTER_STREAM, MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS));
}
SystemStream diagnosticsSystemStream = StreamUtil.getSystemStreamFromNames(diagnosticsReporterStreamName.get());
// Create a SystemProducer for DiagnosticsManager. This producer is used by the DiagnosticsManager
// to write to the same stream as the MetricsSnapshotReporter called `diagnosticsreporter`.
Optional<String> diagnosticsSystemFactoryName = new SystemConfig(config).getSystemFactory(diagnosticsSystemStream.getSystem());
if (!diagnosticsSystemFactoryName.isPresent()) {
throw new SamzaException("Missing factory in config for system " + diagnosticsSystemStream.getSystem());
}
SystemFactory systemFactory = ReflectionUtil.getObj(diagnosticsSystemFactoryName.get(), SystemFactory.class);
SystemProducer systemProducer = systemFactory.getProducer(diagnosticsSystemStream.getSystem(), config, new MetricsRegistryMap(), DiagnosticsUtil.class.getSimpleName());
DiagnosticsManager diagnosticsManager = new DiagnosticsManager(jobName, jobId, jobModel.getContainers(), containerMemoryMb, containerNumCores, new StorageConfig(config).getNumPersistentStores(), maxHeapSizeBytes, containerThreadPoolSize, containerId, executionEnvContainerId.orElse(""), samzaEpochId.orElse(""), taskClassVersion, samzaVersion, hostName, diagnosticsSystemStream, systemProducer, Duration.ofMillis(new TaskConfig(config).getShutdownMs()), jobConfig.getAutosizingEnabled(), config);
diagnosticsManagerOptional = Optional.of(diagnosticsManager);
}
return diagnosticsManagerOptional;
}
use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.
the class ContainerLaunchUtil method run.
@VisibleForTesting
static void run(ApplicationDescriptorImpl<? extends ApplicationDescriptor> appDesc, String jobName, String jobId, String containerId, Optional<String> executionEnvContainerId, Optional<String> samzaEpochId, JobModel jobModel, Config config, Optional<ExternalContext> externalContextOptional) {
CoordinatorStreamStore coordinatorStreamStore = buildCoordinatorStreamStore(config, new MetricsRegistryMap());
coordinatorStreamStore.init();
/*
* We track the exit code and only trigger exit in the finally block to make sure we are able to execute all the
* clean up steps. Prior implementation had short circuited exit causing some of the clean up steps to be missed.
*/
int exitCode = 0;
try {
TaskFactory taskFactory = TaskFactoryUtil.getTaskFactory(appDesc);
LocalityManager localityManager = new LocalityManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetContainerHostMapping.TYPE));
// StartpointManager wraps the coordinatorStreamStore in the namespaces internally
StartpointManager startpointManager = null;
if (new JobConfig(config).getStartpointEnabled()) {
startpointManager = new StartpointManager(coordinatorStreamStore);
}
Map<String, MetricsReporter> metricsReporters = loadMetricsReporters(appDesc, containerId, config);
// Creating diagnostics manager and reporter, and wiring it respectively
Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(jobName, jobId, jobModel, containerId, executionEnvContainerId, samzaEpochId, config);
MetricsRegistryMap metricsRegistryMap = new MetricsRegistryMap();
SamzaContainer container = SamzaContainer$.MODULE$.apply(containerId, jobModel, ScalaJavaUtil.toScalaMap(metricsReporters), metricsRegistryMap, taskFactory, JobContextImpl.fromConfigWithDefaults(config, jobModel), Option.apply(appDesc.getApplicationContainerContextFactory().orElse(null)), Option.apply(appDesc.getApplicationTaskContextFactory().orElse(null)), Option.apply(externalContextOptional.orElse(null)), localityManager, startpointManager, Option.apply(diagnosticsManager.orElse(null)));
ProcessorLifecycleListener processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() {
}, config);
ClusterBasedProcessorLifecycleListener listener = new ClusterBasedProcessorLifecycleListener(config, processorLifecycleListener, container::shutdown);
container.setContainerListener(listener);
ContainerHeartbeatMonitor heartbeatMonitor = createContainerHeartbeatMonitor(container, new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetConfig.TYPE), config);
if (heartbeatMonitor != null) {
heartbeatMonitor.start();
}
if (new JobConfig(config).getApplicationMasterHighAvailabilityEnabled()) {
executionEnvContainerId.ifPresent(execEnvContainerId -> {
ExecutionContainerIdManager executionContainerIdManager = new ExecutionContainerIdManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetExecutionEnvContainerIdMapping.TYPE));
executionContainerIdManager.writeExecutionEnvironmentContainerIdMapping(containerId, execEnvContainerId);
});
}
container.run();
if (heartbeatMonitor != null) {
heartbeatMonitor.stop();
}
// overriding the value with what the listener returns
if (containerRunnerException == null) {
containerRunnerException = listener.getContainerException();
}
if (containerRunnerException != null) {
log.error("Container stopped with Exception. Exiting process now.", containerRunnerException);
exitCode = 1;
}
} catch (Throwable e) {
/*
* Two separate log statements are intended to print the entire stack trace as part of the logs. Using
* single log statement with custom format requires explicitly fetching stack trace and null checks which makes
* the code slightly hard to read in comparison with the current choice.
*/
log.error("Exiting the process due to", e);
log.error("Container runner exception: ", containerRunnerException);
exitCode = 1;
} finally {
coordinatorStreamStore.close();
/*
* Only exit in the scenario of non-zero exit code in order to maintain parity with current implementation where
* the method completes when no errors are encountered.
*/
if (exitCode != 0) {
exitProcess(exitCode);
}
}
}
use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.
the class TestDiagnosticsUtil method testBuildDiagnosticsManager.
@Test
public void testBuildDiagnosticsManager() {
Config config = new MapConfig(buildTestConfigs());
JobModel mockJobModel = mock(JobModel.class);
SystemFactory systemFactory = mock(SystemFactory.class);
SystemProducer mockProducer = mock(SystemProducer.class);
when(systemFactory.getProducer(anyString(), any(Config.class), any(MetricsRegistry.class), anyString())).thenReturn(mockProducer);
PowerMockito.mockStatic(ReflectionUtil.class);
when(ReflectionUtil.getObj(SYSTEM_FACTORY, SystemFactory.class)).thenReturn(systemFactory);
Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(JOB_NAME, JOB_ID, mockJobModel, CONTAINER_ID, Optional.of(ENV_ID), Optional.of(SAMZA_EPOCH_ID), config);
Assert.assertTrue(diagnosticsManager.isPresent());
}
use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.
the class StaticResourceJobCoordinator method start.
@Override
public void start() {
LOG.info("Starting job coordinator");
this.systemAdmins.start();
this.startpointManager.ifPresent(StartpointManager::start);
try {
JobModel jobModel = newJobModel();
doSetLoggingContextConfig(jobModel.getConfig());
// monitors should be created right after job model is calculated (see jobModelMonitors() for more details)
JobModelMonitors jobModelMonitors = jobModelMonitors(jobModel);
Optional<DiagnosticsManager> diagnosticsManager = diagnosticsManager(jobModel);
JobCoordinatorMetadata newMetadata = this.jobCoordinatorMetadataManager.generateJobCoordinatorMetadata(jobModel, jobModel.getConfig());
Set<JobMetadataChange> jobMetadataChanges = checkForMetadataChanges(newMetadata);
if (!jobMetadataChanges.isEmpty() && !jobMetadataChanges.contains(JobMetadataChange.NEW_DEPLOYMENT)) {
/*
* If the job coordinator comes up, but not due to a new deployment, and the metadata changed, then trigger a
* restart. This case applies if the job coordinator died and the job model needed to change while it was down.
* If there were no metadata changes, then just let the current workers continue to run.
* If there was a new deployment (which includes the case where the coordinator requested a restart), then we
* rely on the external resource manager to make sure the previous workers restarted, so we don't need to
* restart again.
*/
LOG.info("Triggering job restart");
this.jobRestartSignal.restartJob();
} else {
prepareWorkerExecution(jobModel, newMetadata, jobMetadataChanges);
// save components that depend on job model in order to manage lifecycle or access later
this.currentDiagnosticsManager = diagnosticsManager;
this.currentJobModelMonitors = Optional.of(jobModelMonitors);
this.currentJobModel = Optional.of(jobModel);
// lifecycle: start components
this.coordinatorCommunication.start();
this.jobCoordinatorListener.ifPresent(listener -> listener.onNewJobModel(this.processorId, jobModel));
this.currentDiagnosticsManager.ifPresent(DiagnosticsManager::start);
jobModelMonitors.start();
this.jobPreparationComplete.set(true);
}
} catch (Exception e) {
LOG.error("Error while running job coordinator; exiting", e);
throw new SamzaException("Error while running job coordinator", e);
}
}
use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.
the class StreamProcessor method createSamzaContainer.
@VisibleForTesting
SamzaContainer createSamzaContainer(String processorId, JobModel jobModel) {
// Creating diagnostics manager and wiring it respectively
String jobName = new JobConfig(config).getName().get();
String jobId = new JobConfig(config).getJobId();
Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(jobName, jobId, jobModel, processorId, Optional.empty(), Optional.empty(), config);
// Metadata store lifecycle managed outside of the SamzaContainer.
// All manager lifecycles are managed in the SamzaContainer including startpointManager
StartpointManager startpointManager = null;
if (metadataStore != null && new JobConfig(config).getStartpointEnabled()) {
startpointManager = new StartpointManager(metadataStore);
} else if (!new JobConfig(config).getStartpointEnabled()) {
LOGGER.warn("StartpointManager not instantiated because startpoints is not enabled");
} else {
LOGGER.warn("StartpointManager cannot be instantiated because no metadata store defined for this stream processor");
}
/*
* StreamProcessor has a metricsRegistry instance variable, but StreamProcessor registers its metrics on its own
* with the reporters. Therefore, don't reuse the StreamProcessor.metricsRegistry, because SamzaContainer also
* registers the registry, and that will result in unnecessary duplicate metrics.
*/
MetricsRegistryMap metricsRegistryMap = new MetricsRegistryMap();
return SamzaContainer.apply(processorId, jobModel, ScalaJavaUtil.toScalaMap(this.customMetricsReporter), metricsRegistryMap, this.taskFactory, JobContextImpl.fromConfigWithDefaults(this.config, jobModel), Option.apply(this.applicationDefinedContainerContextFactoryOptional.orElse(null)), Option.apply(this.applicationDefinedTaskContextFactoryOptional.orElse(null)), Option.apply(this.externalContextOptional.orElse(null)), null, startpointManager, Option.apply(diagnosticsManager.orElse(null)));
}
Aggregations