Search in sources :

Example 16 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class JobModelCalculator method calculateJobModel.

/**
 * Does the following:
 * 1. Fetches metadata of the input streams defined in configuration through {@code streamMetadataCache}.
 * 2. Applies the SSP grouper and task name grouper defined in the configuration to build the {@link JobModel}.
 * @param originalConfig the configuration of the job.
 * @param changeLogPartitionMapping the task to changelog partition mapping of the job.
 * @param streamMetadataCache the cache that holds the partition metadata of the input streams.
 * @param grouperMetadata provides the historical metadata of the application.
 * @return the built {@link JobModel}.
 */
public JobModel calculateJobModel(Config originalConfig, Map<TaskName, Integer> changeLogPartitionMapping, StreamMetadataCache streamMetadataCache, GrouperMetadata grouperMetadata) {
    // refresh config if enabled regex topic rewriter
    Config refreshedConfig = refreshConfigByRegexTopicRewriter(originalConfig);
    TaskConfig taskConfig = new TaskConfig(refreshedConfig);
    // Do grouping to fetch TaskName to SSP mapping
    Set<SystemStreamPartition> allSystemStreamPartitions = getMatchedInputStreamPartitions(refreshedConfig, streamMetadataCache);
    // processor list is required by some of the groupers. So, let's pass them as part of the config.
    // Copy the config and add the processor list to the config copy.
    // TODO: It is non-ideal to have config as a medium to transmit the locality information; especially, if the locality information evolves. Evaluate options on using context objects to pass dependent components.
    Map<String, String> configMap = new HashMap<>(refreshedConfig);
    configMap.put(JobConfig.PROCESSOR_LIST, String.join(",", grouperMetadata.getProcessorLocality().keySet()));
    SystemStreamPartitionGrouper grouper = getSystemStreamPartitionGrouper(new MapConfig(configMap));
    JobConfig jobConfig = new JobConfig(refreshedConfig);
    Map<TaskName, Set<SystemStreamPartition>> groups;
    if (jobConfig.isSSPGrouperProxyEnabled()) {
        SSPGrouperProxy sspGrouperProxy = new SSPGrouperProxy(refreshedConfig, grouper);
        groups = sspGrouperProxy.group(allSystemStreamPartitions, grouperMetadata);
    } else {
        LOG.warn(String.format("SSPGrouperProxy is disabled (%s = false). Stateful jobs may produce erroneous results if this is not enabled.", JobConfig.SSP_INPUT_EXPANSION_ENABLED));
        groups = grouper.group(allSystemStreamPartitions);
    }
    LOG.info(String.format("SystemStreamPartitionGrouper %s has grouped the SystemStreamPartitions into %d tasks with the following taskNames: %s", grouper, groups.size(), groups));
    // If no mappings are present (first time the job is running) we return -1, this will allow 0 to be the first change
    // mapping.
    int maxChangelogPartitionId = changeLogPartitionMapping.values().stream().max(Comparator.naturalOrder()).orElse(-1);
    // Sort the groups prior to assigning the changelog mapping so that the mapping is reproducible and intuitive
    TreeMap<TaskName, Set<SystemStreamPartition>> sortedGroups = new TreeMap<>(groups);
    Set<TaskModel> taskModels = new HashSet<>();
    for (Map.Entry<TaskName, Set<SystemStreamPartition>> group : sortedGroups.entrySet()) {
        TaskName taskName = group.getKey();
        Set<SystemStreamPartition> systemStreamPartitions = group.getValue();
        Optional<Integer> changelogPartitionId = Optional.ofNullable(changeLogPartitionMapping.get(taskName));
        Partition changelogPartition;
        if (changelogPartitionId.isPresent()) {
            changelogPartition = new Partition(changelogPartitionId.get());
        } else {
            // If we've never seen this TaskName before, then assign it a new changelog partition.
            maxChangelogPartitionId++;
            LOG.info(String.format("New task %s is being assigned changelog partition %s.", taskName, maxChangelogPartitionId));
            changelogPartition = new Partition(maxChangelogPartitionId);
        }
        taskModels.add(new TaskModel(taskName, systemStreamPartitions, changelogPartition));
    }
    // Here is where we should put in a pluggable option for the SSPTaskNameGrouper for locality, load-balancing, etc.
    TaskNameGrouperFactory containerGrouperFactory = ReflectionUtil.getObj(taskConfig.getTaskNameGrouperFactory(), TaskNameGrouperFactory.class);
    boolean standbyTasksEnabled = jobConfig.getStandbyTasksEnabled();
    int standbyTaskReplicationFactor = jobConfig.getStandbyTaskReplicationFactor();
    TaskNameGrouperProxy taskNameGrouperProxy = new TaskNameGrouperProxy(containerGrouperFactory.build(refreshedConfig), standbyTasksEnabled, standbyTaskReplicationFactor);
    Set<ContainerModel> containerModels;
    boolean isHostAffinityEnabled = new ClusterManagerConfig(refreshedConfig).getHostAffinityEnabled();
    if (isHostAffinityEnabled) {
        containerModels = taskNameGrouperProxy.group(taskModels, grouperMetadata);
    } else {
        containerModels = taskNameGrouperProxy.group(taskModels, new ArrayList<>(grouperMetadata.getProcessorLocality().keySet()));
    }
    Map<String, ContainerModel> containerMap = containerModels.stream().collect(Collectors.toMap(ContainerModel::getId, Function.identity()));
    return new JobModel(refreshedConfig, containerMap);
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) SSPGrouperProxy(org.apache.samza.container.grouper.stream.SSPGrouperProxy) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) TaskConfig(org.apache.samza.config.TaskConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) TaskConfig(org.apache.samza.config.TaskConfig) JobConfig(org.apache.samza.config.JobConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) TaskNameGrouperFactory(org.apache.samza.container.grouper.task.TaskNameGrouperFactory) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) TreeMap(java.util.TreeMap) SystemStreamPartitionGrouper(org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouper) TaskNameGrouperProxy(org.apache.samza.container.grouper.task.TaskNameGrouperProxy) TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 17 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class ExecutionPlanner method validateConfig.

private void validateConfig() {
    ApplicationConfig appConfig = new ApplicationConfig(config);
    ClusterManagerConfig clusterConfig = new ClusterManagerConfig(config);
    // currently we don't support host-affinity in batch mode
    if (appConfig.getAppMode() == ApplicationConfig.ApplicationMode.BATCH && clusterConfig.getHostAffinityEnabled()) {
        throw new SamzaException(String.format("Host affinity is not supported in batch mode. Please configure %s=false.", ClusterManagerConfig.JOB_HOST_AFFINITY_ENABLED));
    }
}
Also used : ApplicationConfig(org.apache.samza.config.ApplicationConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) SamzaException(org.apache.samza.SamzaException)

Example 18 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testNewContainerRequestedOnFailureWithKnownCode.

/**
 * Test AM requests a new container when a task fails
 * Error codes with same behavior - Disk failure, preemption and aborted
 */
@Test
public void testNewContainerRequestedOnFailureWithKnownCode() throws Exception {
    Config conf = getConfig();
    Map<String, String> config = new HashMap<>();
    config.putAll(getConfig());
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(new MapConfig(config)));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
    // Start the task clusterResourceManager
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    SamzaResource container1 = new SamzaResource(1, 1000, "host1", "id1");
    cpm.onResourceAllocated(container1);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
    cpm.onStreamProcessorLaunchSuccess(container1);
    // Create container failure - with ContainerExitStatus.DISKS_FAILED
    SamzaResourceStatus resourceStatusOnAppError = new SamzaResourceStatus(container1.getContainerId(), "App error", 1);
    cpm.onResourceCompleted(resourceStatusOnAppError);
    verify(cpm).onResourceCompletedWithUnknownStatus(eq(resourceStatusOnAppError), anyString(), anyString(), anyInt());
    // The above failure should trigger a container request
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(2, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    assertEquals(ResourceRequestState.ANY_HOST, allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    SamzaResource container2 = new SamzaResource(1, 1000, "host1", "id2");
    cpm.onResourceAllocated(container2);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container2);
    // Create container failure - with ContainerExitStatus.PREEMPTED
    SamzaResourceStatus resourceStatusOnPreemption = new SamzaResourceStatus(container2.getContainerId(), "Preemption", SamzaResourceStatus.PREEMPTED);
    cpm.onResourceCompleted(resourceStatusOnPreemption);
    verify(cpm, never()).onResourceCompletedWithUnknownStatus(eq(resourceStatusOnPreemption), anyString(), anyString(), anyInt());
    assertEquals(3, clusterResourceManager.resourceRequests.size());
    // The above failure should trigger a container request
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(ResourceRequestState.ANY_HOST, allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    SamzaResource container3 = new SamzaResource(1, 1000, "host1", "id3");
    cpm.onResourceAllocated(container3);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container3);
    // Create container failure - with ContainerExitStatus.ABORTED
    SamzaResourceStatus resourceStatusOnAborted = new SamzaResourceStatus(container3.getContainerId(), "Aborted", SamzaResourceStatus.ABORTED);
    cpm.onResourceCompleted(resourceStatusOnAborted);
    verify(cpm, never()).onResourceCompletedWithUnknownStatus(eq(resourceStatusOnAborted), anyString(), anyString(), anyInt());
    // The above failure should trigger a container request
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(4, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(ResourceRequestState.ANY_HOST, allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Example 19 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testAllBufferedResourcesAreUtilized.

@Test
public void testAllBufferedResourcesAreUtilized() throws Exception {
    Map<String, String> config = new HashMap<>();
    config.putAll(getConfigWithHostAffinity());
    config.put("job.container.count", "2");
    config.put("cluster-manager.container.retry.count", "2");
    config.put("cluster-manager.container.request.timeout.ms", "10000");
    Config cfg = new MapConfig(config);
    // 1. Request two containers on hosts - host1 and host2
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"), "1", new ProcessorLocality("1", "host2"))));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, Boolean.parseBoolean(config.get(ClusterManagerConfig.HOST_AFFINITY_ENABLED)), false, mockLocalityManager, faultDomainManager);
    MockContainerAllocatorWithHostAffinity allocator = new MockContainerAllocatorWithHostAffinity(clusterResourceManager, cfg, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(new ClusterManagerConfig(cfg), state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager));
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    // 2. When the task manager starts, there should have been a pending request on host1 and host2
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    // 3. Allocate an extra resource on host1 and no resource on host2 yet.
    SamzaResource resource1 = new SamzaResource(1, 1000, "host1", "id1");
    SamzaResource resource2 = new SamzaResource(1, 1000, "host1", "id2");
    cpm.onResourceAllocated(resource1);
    cpm.onResourceAllocated(resource2);
    // 4. Wait for the container to start on host1 and immediately fail
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource1);
    assertEquals("host2", allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    cpm.onResourceCompleted(new SamzaResourceStatus(resource1.getContainerId(), "App Error", 1));
    verify(cpm).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(3, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    // 5. Do not allocate any further resource on host1, and verify that the re-run of the container on host1 uses the
    // previously allocated extra resource
    SamzaResource resource3 = new SamzaResource(1, 1000, "host2", "id3");
    cpm.onResourceAllocated(resource3);
    if (!allocator.awaitContainersStart(2, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource2);
    cpm.onStreamProcessorLaunchSuccess(resource3);
    assertTrue(state.jobHealthy.get());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) LocalityModel(org.apache.samza.job.model.LocalityModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) LocalityManager(org.apache.samza.container.LocalityManager) Test(org.junit.Test)

Example 20 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testDuplicateNotificationsDoNotAffectJobHealth.

@Test
public void testDuplicateNotificationsDoNotAffectJobHealth() throws Exception {
    Config conf = getConfig();
    Map<String, String> config = new HashMap<>();
    config.putAll(getConfig());
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(new MapConfig(conf)));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
    // Start the task manager
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    SamzaResource container1 = new SamzaResource(1, 1000, "host1", "id1");
    cpm.onResourceAllocated(container1);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container1);
    assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
    // Create container failure - with ContainerExitStatus.DISKS_FAILED
    cpm.onResourceCompleted(new SamzaResourceStatus(container1.getContainerId(), "Disk failure", SamzaResourceStatus.DISK_FAIL));
    verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    // The above failure should trigger a container request
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(2, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    assertEquals(ResourceRequestState.ANY_HOST, allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    SamzaResource container2 = new SamzaResource(1, 1000, "host1", "id2");
    cpm.onResourceAllocated(container2);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container2);
    assertTrue(state.jobHealthy.get());
    // Simulate a duplicate notification for container 1 with a different exit code
    cpm.onResourceCompleted(new SamzaResourceStatus(container1.getContainerId(), "Disk failure", SamzaResourceStatus.PREEMPTED));
    verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    // assert that a duplicate notification does not change metrics (including job health)
    assertEquals(state.redundantNotifications.get(), 1);
    assertEquals(2, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    assertTrue(state.jobHealthy.get());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Aggregations

ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)23 MapConfig (org.apache.samza.config.MapConfig)17 Test (org.junit.Test)15 JobConfig (org.apache.samza.config.JobConfig)12 Config (org.apache.samza.config.Config)11 HashMap (java.util.HashMap)10 Matchers.anyString (org.mockito.Matchers.anyString)9 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)8 LocalityManager (org.apache.samza.container.LocalityManager)6 LocalityModel (org.apache.samza.job.model.LocalityModel)6 ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)6 CountDownLatch (java.util.concurrent.CountDownLatch)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 SamzaException (org.apache.samza.SamzaException)2 ContainerPlacementMetadata (org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata)2 TaskConfig (org.apache.samza.config.TaskConfig)2 ContainerPlacementRequestMessage (org.apache.samza.container.placement.ContainerPlacementRequestMessage)2 ServletHolder (org.eclipse.jetty.servlet.ServletHolder)2