Search in sources :

Example 11 with ProcessorLocality

use of org.apache.samza.job.model.ProcessorLocality in project samza by apache.

the class YarnJobValidationTool method validateJmxMetrics.

public void validateJmxMetrics() throws Exception {
    MetricsRegistry metricsRegistry = new MetricsRegistryMap();
    CoordinatorStreamStore coordinatorStreamStore = new CoordinatorStreamStore(config, metricsRegistry);
    coordinatorStreamStore.init();
    try {
        LocalityManager localityManager = new LocalityManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetConfig.TYPE));
        validator.init(config);
        LocalityModel localityModel = localityManager.readLocality();
        for (ProcessorLocality processorLocality : localityModel.getProcessorLocalities().values()) {
            String containerId = processorLocality.id();
            String jmxUrl = processorLocality.jmxTunnelingUrl();
            if (StringUtils.isNotBlank(jmxUrl)) {
                log.info("validate container " + containerId + " metrics with JMX: " + jmxUrl);
                JmxMetricsAccessor jmxMetrics = new JmxMetricsAccessor(jmxUrl);
                jmxMetrics.connect();
                validator.validate(jmxMetrics);
                jmxMetrics.close();
                log.info("validate container " + containerId + " successfully");
            }
        }
        validator.complete();
    } finally {
        coordinatorStreamStore.close();
    }
}
Also used : NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) JmxMetricsAccessor(org.apache.samza.metrics.JmxMetricsAccessor) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) CoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) LocalityManager(org.apache.samza.container.LocalityManager) LocalityModel(org.apache.samza.job.model.LocalityModel)

Example 12 with ProcessorLocality

use of org.apache.samza.job.model.ProcessorLocality in project samza by apache.

the class LocalityServlet method doGet.

@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
    response.setContentType("application/json");
    response.setStatus(HttpServletResponse.SC_OK);
    LocalityModel localityModel = localityManager.readLocality();
    if (request.getParameterMap().size() == 1) {
        String processorId = request.getParameter(PROCESSOR_ID_PARAM);
        ProcessorLocality processorLocality = Optional.ofNullable(localityModel.getProcessorLocality(processorId)).orElse(new ProcessorLocality(processorId, ""));
        mapper.writeValue(response.getWriter(), processorLocality);
    } else {
        mapper.writeValue(response.getWriter(), localityModel);
    }
}
Also used : ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) LocalityModel(org.apache.samza.job.model.LocalityModel)

Example 13 with ProcessorLocality

use of org.apache.samza.job.model.ProcessorLocality in project samza by apache.

the class JobModelHelper method getGrouperMetadata.

private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
    Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
    Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
    Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
    Map<TaskName, LocationId> taskLocality = new HashMap<>();
    // We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
    taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskNameToProcessorId.put(taskName, containerId);
            if (processorLocality.containsKey(containerId)) {
                taskLocality.put(taskName, processorLocality.get(containerId));
            }
        }
    });
    Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
    Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
    // taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
    sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
            taskPartitionAssignments.get(taskName).add(systemStreamPartition);
        }
    }));
    return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) LocationId(org.apache.samza.runtime.LocationId) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 14 with ProcessorLocality

use of org.apache.samza.job.model.ProcessorLocality in project samza by apache.

the class JobModelHelper method getProcessorLocality.

/**
 * Retrieves and returns the processor locality of a samza job using provided {@see Config} and {@see LocalityManager}.
 * @param config provides the configurations defined by the user. Required to connect to the storage layer.
 * @param localityManager provides the processor to host mapping persisted to the metadata store.
 * @return the processor locality.
 */
private static Map<String, LocationId> getProcessorLocality(Config config, LocalityManager localityManager) {
    Map<String, LocationId> containerToLocationId = new HashMap<>();
    Map<String, ProcessorLocality> existingContainerLocality = localityManager.readLocality().getProcessorLocalities();
    for (int i = 0; i < new JobConfig(config).getContainerCount(); i++) {
        String containerId = Integer.toString(i);
        LocationId locationId = Optional.ofNullable(existingContainerLocality.get(containerId)).map(ProcessorLocality::host).filter(StringUtils::isNotEmpty).map(LocationId::new).orElse(new LocationId("ANY_HOST"));
        containerToLocationId.put(containerId, locationId);
    }
    return containerToLocationId;
}
Also used : ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) HashMap(java.util.HashMap) StringUtils(org.apache.commons.lang3.StringUtils) LocationId(org.apache.samza.runtime.LocationId) JobConfig(org.apache.samza.config.JobConfig)

Example 15 with ProcessorLocality

use of org.apache.samza.job.model.ProcessorLocality in project samza by apache.

the class TestContainerProcessManager method testAllBufferedResourcesAreUtilized.

@Test
public void testAllBufferedResourcesAreUtilized() throws Exception {
    Map<String, String> config = new HashMap<>();
    config.putAll(getConfigWithHostAffinity());
    config.put("job.container.count", "2");
    config.put("cluster-manager.container.retry.count", "2");
    config.put("cluster-manager.container.request.timeout.ms", "10000");
    Config cfg = new MapConfig(config);
    // 1. Request two containers on hosts - host1 and host2
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"), "1", new ProcessorLocality("1", "host2"))));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, Boolean.parseBoolean(config.get(ClusterManagerConfig.HOST_AFFINITY_ENABLED)), false, mockLocalityManager, faultDomainManager);
    MockContainerAllocatorWithHostAffinity allocator = new MockContainerAllocatorWithHostAffinity(clusterResourceManager, cfg, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(new ClusterManagerConfig(cfg), state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager));
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    // 2. When the task manager starts, there should have been a pending request on host1 and host2
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    // 3. Allocate an extra resource on host1 and no resource on host2 yet.
    SamzaResource resource1 = new SamzaResource(1, 1000, "host1", "id1");
    SamzaResource resource2 = new SamzaResource(1, 1000, "host1", "id2");
    cpm.onResourceAllocated(resource1);
    cpm.onResourceAllocated(resource2);
    // 4. Wait for the container to start on host1 and immediately fail
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource1);
    assertEquals("host2", allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    cpm.onResourceCompleted(new SamzaResourceStatus(resource1.getContainerId(), "App Error", 1));
    verify(cpm).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(3, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    // 5. Do not allocate any further resource on host1, and verify that the re-run of the container on host1 uses the
    // previously allocated extra resource
    SamzaResource resource3 = new SamzaResource(1, 1000, "host2", "id3");
    cpm.onResourceAllocated(resource3);
    if (!allocator.awaitContainersStart(2, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource2);
    cpm.onStreamProcessorLaunchSuccess(resource3);
    assertTrue(state.jobHealthy.get());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) LocalityModel(org.apache.samza.job.model.LocalityModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) LocalityManager(org.apache.samza.container.LocalityManager) Test(org.junit.Test)

Aggregations

ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)18 LocalityModel (org.apache.samza.job.model.LocalityModel)12 LocalityManager (org.apache.samza.container.LocalityManager)10 HashMap (java.util.HashMap)8 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)6 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)6 MapConfig (org.apache.samza.config.MapConfig)5 Test (org.junit.Test)5 JobConfig (org.apache.samza.config.JobConfig)4 LocationId (org.apache.samza.runtime.LocationId)4 Matchers.anyString (org.mockito.Matchers.anyString)4 Map (java.util.Map)3 Config (org.apache.samza.config.Config)3 CoordinatorStreamStore (org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 StringUtils (org.apache.commons.lang3.StringUtils)2 ContainerPlacementMetadataStore (org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadataStore)2