Search in sources :

Example 1 with DiagnosticsManager

use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.

the class DiagnosticsUtil method buildDiagnosticsManager.

/**
 * Create a {@link DiagnosticsManager} for the given jobName, jobId, containerId, and execEnvContainerId, if
 * diagnostics is enabled.
 * @param executionEnvContainerId ID assigned to the container by the cluster manager (e.g. YARN)
 * @param samzaEpochId ID assigned to the job deployment attempt by the cluster manager
 */
public static Optional<DiagnosticsManager> buildDiagnosticsManager(String jobName, String jobId, JobModel jobModel, String containerId, Optional<String> executionEnvContainerId, Optional<String> samzaEpochId, Config config) {
    JobConfig jobConfig = new JobConfig(config);
    MetricsConfig metricsConfig = new MetricsConfig(config);
    Optional<DiagnosticsManager> diagnosticsManagerOptional = Optional.empty();
    if (jobConfig.getDiagnosticsEnabled()) {
        ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(config);
        int containerMemoryMb = clusterManagerConfig.getContainerMemoryMb();
        int containerNumCores = clusterManagerConfig.getNumCores();
        long maxHeapSizeBytes = Runtime.getRuntime().maxMemory();
        int containerThreadPoolSize = jobConfig.getThreadPoolSize();
        String taskClassVersion = Util.getTaskClassVersion(config);
        String samzaVersion = Util.getSamzaVersion();
        String hostName = Util.getLocalHost().getHostName();
        Optional<String> diagnosticsReporterStreamName = metricsConfig.getMetricsSnapshotReporterStream(MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS);
        if (!diagnosticsReporterStreamName.isPresent()) {
            throw new ConfigException("Missing required config: " + String.format(MetricsConfig.METRICS_SNAPSHOT_REPORTER_STREAM, MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS));
        }
        SystemStream diagnosticsSystemStream = StreamUtil.getSystemStreamFromNames(diagnosticsReporterStreamName.get());
        // Create a SystemProducer for DiagnosticsManager. This producer is used by the DiagnosticsManager
        // to write to the same stream as the MetricsSnapshotReporter called `diagnosticsreporter`.
        Optional<String> diagnosticsSystemFactoryName = new SystemConfig(config).getSystemFactory(diagnosticsSystemStream.getSystem());
        if (!diagnosticsSystemFactoryName.isPresent()) {
            throw new SamzaException("Missing factory in config for system " + diagnosticsSystemStream.getSystem());
        }
        SystemFactory systemFactory = ReflectionUtil.getObj(diagnosticsSystemFactoryName.get(), SystemFactory.class);
        SystemProducer systemProducer = systemFactory.getProducer(diagnosticsSystemStream.getSystem(), config, new MetricsRegistryMap(), DiagnosticsUtil.class.getSimpleName());
        DiagnosticsManager diagnosticsManager = new DiagnosticsManager(jobName, jobId, jobModel.getContainers(), containerMemoryMb, containerNumCores, new StorageConfig(config).getNumPersistentStores(), maxHeapSizeBytes, containerThreadPoolSize, containerId, executionEnvContainerId.orElse(""), samzaEpochId.orElse(""), taskClassVersion, samzaVersion, hostName, diagnosticsSystemStream, systemProducer, Duration.ofMillis(new TaskConfig(config).getShutdownMs()), jobConfig.getAutosizingEnabled(), config);
        diagnosticsManagerOptional = Optional.of(diagnosticsManager);
    }
    return diagnosticsManagerOptional;
}
Also used : SystemConfig(org.apache.samza.config.SystemConfig) SystemFactory(org.apache.samza.system.SystemFactory) DiagnosticsManager(org.apache.samza.diagnostics.DiagnosticsManager) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) SystemProducer(org.apache.samza.system.SystemProducer) ConfigException(org.apache.samza.config.ConfigException) TaskConfig(org.apache.samza.config.TaskConfig) SamzaException(org.apache.samza.SamzaException) JobConfig(org.apache.samza.config.JobConfig) MetricsConfig(org.apache.samza.config.MetricsConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap)

Example 2 with DiagnosticsManager

use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.

the class ContainerLaunchUtil method run.

@VisibleForTesting
static void run(ApplicationDescriptorImpl<? extends ApplicationDescriptor> appDesc, String jobName, String jobId, String containerId, Optional<String> executionEnvContainerId, Optional<String> samzaEpochId, JobModel jobModel, Config config, Optional<ExternalContext> externalContextOptional) {
    CoordinatorStreamStore coordinatorStreamStore = buildCoordinatorStreamStore(config, new MetricsRegistryMap());
    coordinatorStreamStore.init();
    /*
     * We track the exit code and only trigger exit in the finally block to make sure we are able to execute all the
     * clean up steps. Prior implementation had short circuited exit causing some of the clean up steps to be missed.
     */
    int exitCode = 0;
    try {
        TaskFactory taskFactory = TaskFactoryUtil.getTaskFactory(appDesc);
        LocalityManager localityManager = new LocalityManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetContainerHostMapping.TYPE));
        // StartpointManager wraps the coordinatorStreamStore in the namespaces internally
        StartpointManager startpointManager = null;
        if (new JobConfig(config).getStartpointEnabled()) {
            startpointManager = new StartpointManager(coordinatorStreamStore);
        }
        Map<String, MetricsReporter> metricsReporters = loadMetricsReporters(appDesc, containerId, config);
        // Creating diagnostics manager and reporter, and wiring it respectively
        Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(jobName, jobId, jobModel, containerId, executionEnvContainerId, samzaEpochId, config);
        MetricsRegistryMap metricsRegistryMap = new MetricsRegistryMap();
        SamzaContainer container = SamzaContainer$.MODULE$.apply(containerId, jobModel, ScalaJavaUtil.toScalaMap(metricsReporters), metricsRegistryMap, taskFactory, JobContextImpl.fromConfigWithDefaults(config, jobModel), Option.apply(appDesc.getApplicationContainerContextFactory().orElse(null)), Option.apply(appDesc.getApplicationTaskContextFactory().orElse(null)), Option.apply(externalContextOptional.orElse(null)), localityManager, startpointManager, Option.apply(diagnosticsManager.orElse(null)));
        ProcessorLifecycleListener processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() {
        }, config);
        ClusterBasedProcessorLifecycleListener listener = new ClusterBasedProcessorLifecycleListener(config, processorLifecycleListener, container::shutdown);
        container.setContainerListener(listener);
        ContainerHeartbeatMonitor heartbeatMonitor = createContainerHeartbeatMonitor(container, new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetConfig.TYPE), config);
        if (heartbeatMonitor != null) {
            heartbeatMonitor.start();
        }
        if (new JobConfig(config).getApplicationMasterHighAvailabilityEnabled()) {
            executionEnvContainerId.ifPresent(execEnvContainerId -> {
                ExecutionContainerIdManager executionContainerIdManager = new ExecutionContainerIdManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetExecutionEnvContainerIdMapping.TYPE));
                executionContainerIdManager.writeExecutionEnvironmentContainerIdMapping(containerId, execEnvContainerId);
            });
        }
        container.run();
        if (heartbeatMonitor != null) {
            heartbeatMonitor.stop();
        }
        // overriding the value with what the listener returns
        if (containerRunnerException == null) {
            containerRunnerException = listener.getContainerException();
        }
        if (containerRunnerException != null) {
            log.error("Container stopped with Exception. Exiting process now.", containerRunnerException);
            exitCode = 1;
        }
    } catch (Throwable e) {
        /*
       * Two separate log statements are intended to print the entire stack trace as part of the logs. Using
       * single log statement with custom format requires explicitly fetching stack trace and null checks which makes
       * the code slightly hard to read in comparison with the current choice.
       */
        log.error("Exiting the process due to", e);
        log.error("Container runner exception: ", containerRunnerException);
        exitCode = 1;
    } finally {
        coordinatorStreamStore.close();
        /*
       * Only exit in the scenario of non-zero exit code in order to maintain parity with current implementation where
       * the method completes when no errors are encountered.
       */
        if (exitCode != 0) {
            exitProcess(exitCode);
        }
    }
}
Also used : DiagnosticsManager(org.apache.samza.diagnostics.DiagnosticsManager) ContainerHeartbeatMonitor(org.apache.samza.container.ContainerHeartbeatMonitor) JobConfig(org.apache.samza.config.JobConfig) SamzaContainer(org.apache.samza.container.SamzaContainer) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) ExecutionContainerIdManager(org.apache.samza.container.ExecutionContainerIdManager) CoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) MetricsReporter(org.apache.samza.metrics.MetricsReporter) TaskFactory(org.apache.samza.task.TaskFactory) StartpointManager(org.apache.samza.startpoint.StartpointManager) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) LocalityManager(org.apache.samza.container.LocalityManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with DiagnosticsManager

use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.

the class TestDiagnosticsUtil method testBuildDiagnosticsManager.

@Test
public void testBuildDiagnosticsManager() {
    Config config = new MapConfig(buildTestConfigs());
    JobModel mockJobModel = mock(JobModel.class);
    SystemFactory systemFactory = mock(SystemFactory.class);
    SystemProducer mockProducer = mock(SystemProducer.class);
    when(systemFactory.getProducer(anyString(), any(Config.class), any(MetricsRegistry.class), anyString())).thenReturn(mockProducer);
    PowerMockito.mockStatic(ReflectionUtil.class);
    when(ReflectionUtil.getObj(SYSTEM_FACTORY, SystemFactory.class)).thenReturn(systemFactory);
    Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(JOB_NAME, JOB_ID, mockJobModel, CONTAINER_ID, Optional.of(ENV_ID), Optional.of(SAMZA_EPOCH_ID), config);
    Assert.assertTrue(diagnosticsManager.isPresent());
}
Also used : SystemFactory(org.apache.samza.system.SystemFactory) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) DiagnosticsManager(org.apache.samza.diagnostics.DiagnosticsManager) MetricsConfig(org.apache.samza.config.MetricsConfig) JobConfig(org.apache.samza.config.JobConfig) SystemConfig(org.apache.samza.config.SystemConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) SystemProducer(org.apache.samza.system.SystemProducer) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 4 with DiagnosticsManager

use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.

the class StaticResourceJobCoordinator method start.

@Override
public void start() {
    LOG.info("Starting job coordinator");
    this.systemAdmins.start();
    this.startpointManager.ifPresent(StartpointManager::start);
    try {
        JobModel jobModel = newJobModel();
        doSetLoggingContextConfig(jobModel.getConfig());
        // monitors should be created right after job model is calculated (see jobModelMonitors() for more details)
        JobModelMonitors jobModelMonitors = jobModelMonitors(jobModel);
        Optional<DiagnosticsManager> diagnosticsManager = diagnosticsManager(jobModel);
        JobCoordinatorMetadata newMetadata = this.jobCoordinatorMetadataManager.generateJobCoordinatorMetadata(jobModel, jobModel.getConfig());
        Set<JobMetadataChange> jobMetadataChanges = checkForMetadataChanges(newMetadata);
        if (!jobMetadataChanges.isEmpty() && !jobMetadataChanges.contains(JobMetadataChange.NEW_DEPLOYMENT)) {
            /*
         * If the job coordinator comes up, but not due to a new deployment, and the metadata changed, then trigger a
         * restart. This case applies if the job coordinator died and the job model needed to change while it was down.
         * If there were no metadata changes, then just let the current workers continue to run.
         * If there was a new deployment (which includes the case where the coordinator requested a restart), then we
         * rely on the external resource manager to make sure the previous workers restarted, so we don't need to
         * restart again.
         */
            LOG.info("Triggering job restart");
            this.jobRestartSignal.restartJob();
        } else {
            prepareWorkerExecution(jobModel, newMetadata, jobMetadataChanges);
            // save components that depend on job model in order to manage lifecycle or access later
            this.currentDiagnosticsManager = diagnosticsManager;
            this.currentJobModelMonitors = Optional.of(jobModelMonitors);
            this.currentJobModel = Optional.of(jobModel);
            // lifecycle: start components
            this.coordinatorCommunication.start();
            this.jobCoordinatorListener.ifPresent(listener -> listener.onNewJobModel(this.processorId, jobModel));
            this.currentDiagnosticsManager.ifPresent(DiagnosticsManager::start);
            jobModelMonitors.start();
            this.jobPreparationComplete.set(true);
        }
    } catch (Exception e) {
        LOG.error("Error while running job coordinator; exiting", e);
        throw new SamzaException("Error while running job coordinator", e);
    }
}
Also used : DiagnosticsManager(org.apache.samza.diagnostics.DiagnosticsManager) JobCoordinatorMetadata(org.apache.samza.job.JobCoordinatorMetadata) JobModelMonitors(org.apache.samza.coordinator.JobModelMonitors) JobMetadataChange(org.apache.samza.job.JobMetadataChange) JobModel(org.apache.samza.job.model.JobModel) StartpointManager(org.apache.samza.startpoint.StartpointManager) SamzaException(org.apache.samza.SamzaException) ConfigException(org.apache.samza.config.ConfigException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException)

Example 5 with DiagnosticsManager

use of org.apache.samza.diagnostics.DiagnosticsManager in project samza by apache.

the class StreamProcessor method createSamzaContainer.

@VisibleForTesting
SamzaContainer createSamzaContainer(String processorId, JobModel jobModel) {
    // Creating diagnostics manager and wiring it respectively
    String jobName = new JobConfig(config).getName().get();
    String jobId = new JobConfig(config).getJobId();
    Optional<DiagnosticsManager> diagnosticsManager = DiagnosticsUtil.buildDiagnosticsManager(jobName, jobId, jobModel, processorId, Optional.empty(), Optional.empty(), config);
    // Metadata store lifecycle managed outside of the SamzaContainer.
    // All manager lifecycles are managed in the SamzaContainer including startpointManager
    StartpointManager startpointManager = null;
    if (metadataStore != null && new JobConfig(config).getStartpointEnabled()) {
        startpointManager = new StartpointManager(metadataStore);
    } else if (!new JobConfig(config).getStartpointEnabled()) {
        LOGGER.warn("StartpointManager not instantiated because startpoints is not enabled");
    } else {
        LOGGER.warn("StartpointManager cannot be instantiated because no metadata store defined for this stream processor");
    }
    /*
     * StreamProcessor has a metricsRegistry instance variable, but StreamProcessor registers its metrics on its own
     * with the reporters. Therefore, don't reuse the StreamProcessor.metricsRegistry, because SamzaContainer also
     * registers the registry, and that will result in unnecessary duplicate metrics.
     */
    MetricsRegistryMap metricsRegistryMap = new MetricsRegistryMap();
    return SamzaContainer.apply(processorId, jobModel, ScalaJavaUtil.toScalaMap(this.customMetricsReporter), metricsRegistryMap, this.taskFactory, JobContextImpl.fromConfigWithDefaults(this.config, jobModel), Option.apply(this.applicationDefinedContainerContextFactoryOptional.orElse(null)), Option.apply(this.applicationDefinedTaskContextFactoryOptional.orElse(null)), Option.apply(this.externalContextOptional.orElse(null)), null, startpointManager, Option.apply(diagnosticsManager.orElse(null)));
}
Also used : DiagnosticsManager(org.apache.samza.diagnostics.DiagnosticsManager) StartpointManager(org.apache.samza.startpoint.StartpointManager) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) JobConfig(org.apache.samza.config.JobConfig) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

DiagnosticsManager (org.apache.samza.diagnostics.DiagnosticsManager)5 JobConfig (org.apache.samza.config.JobConfig)4 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)3 StartpointManager (org.apache.samza.startpoint.StartpointManager)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 SamzaException (org.apache.samza.SamzaException)2 ConfigException (org.apache.samza.config.ConfigException)2 MetricsConfig (org.apache.samza.config.MetricsConfig)2 SystemConfig (org.apache.samza.config.SystemConfig)2 JobModel (org.apache.samza.job.model.JobModel)2 SystemFactory (org.apache.samza.system.SystemFactory)2 SystemProducer (org.apache.samza.system.SystemProducer)2 IOException (java.io.IOException)1 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)1 Config (org.apache.samza.config.Config)1 MapConfig (org.apache.samza.config.MapConfig)1 StorageConfig (org.apache.samza.config.StorageConfig)1 TaskConfig (org.apache.samza.config.TaskConfig)1 ContainerHeartbeatMonitor (org.apache.samza.container.ContainerHeartbeatMonitor)1 ExecutionContainerIdManager (org.apache.samza.container.ExecutionContainerIdManager)1