use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testOnShutdown.
@Test
public void testOnShutdown() throws Exception {
Config conf = getConfig();
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.empty());
cpm.start();
Thread allocatorThread = (Thread) getPrivateFieldFromCpm("allocatorThread", cpm).get(cpm);
assertTrue(allocatorThread.isAlive());
cpm.stop();
assertFalse(allocatorThread.isAlive());
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testOnInitAMHighAvailability.
@Test
public void testOnInitAMHighAvailability() throws Exception {
Map<String, String> configMap = new HashMap<>(configVals);
configMap.put(JobConfig.YARN_AM_HIGH_AVAILABILITY_ENABLED, "true");
Config conf = new MapConfig(configMap);
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
state.runningProcessors.put("0", new SamzaResource(1, 1024, "host", "0"));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
ClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.empty());
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
getPrivateFieldFromCpm("containerAllocator", cpm).set(cpm, allocator);
CountDownLatch latch = new CountDownLatch(1);
getPrivateFieldFromCpm("allocatorThread", cpm).set(cpm, new Thread() {
public void run() {
isRunning = true;
latch.countDown();
}
});
cpm.start();
if (!latch.await(2, TimeUnit.SECONDS)) {
Assert.fail("timed out waiting for the latch to expire");
}
// Verify Allocator thread has started running
assertTrue(isRunning);
// Verify only 1 was requested with allocator
assertEquals(1, allocator.requestedContainers);
assertTrue("Ensure no processors were forcefully restarted", callback.resourceStatuses.isEmpty());
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testCpmShouldStopWhenContainersFinish.
/**
* Test Container Process Manager should stop when all containers finish
*/
@Test
public void testCpmShouldStopWhenContainersFinish() throws Exception {
Config conf = getConfig();
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
// start triggers a request
cpm.start();
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
cpm.onResourceAllocated(container);
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
assertFalse(cpm.shouldShutdown());
cpm.onResourceCompleted(new SamzaResourceStatus("id0", "diagnostics", SamzaResourceStatus.SUCCESS));
verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
assertTrue(cpm.shouldShutdown());
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class DiagnosticsUtil method buildDiagnosticsManager.
/**
* Create a {@link DiagnosticsManager} for the given jobName, jobId, containerId, and execEnvContainerId, if
* diagnostics is enabled.
* @param executionEnvContainerId ID assigned to the container by the cluster manager (e.g. YARN)
* @param samzaEpochId ID assigned to the job deployment attempt by the cluster manager
*/
public static Optional<DiagnosticsManager> buildDiagnosticsManager(String jobName, String jobId, JobModel jobModel, String containerId, Optional<String> executionEnvContainerId, Optional<String> samzaEpochId, Config config) {
JobConfig jobConfig = new JobConfig(config);
MetricsConfig metricsConfig = new MetricsConfig(config);
Optional<DiagnosticsManager> diagnosticsManagerOptional = Optional.empty();
if (jobConfig.getDiagnosticsEnabled()) {
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(config);
int containerMemoryMb = clusterManagerConfig.getContainerMemoryMb();
int containerNumCores = clusterManagerConfig.getNumCores();
long maxHeapSizeBytes = Runtime.getRuntime().maxMemory();
int containerThreadPoolSize = jobConfig.getThreadPoolSize();
String taskClassVersion = Util.getTaskClassVersion(config);
String samzaVersion = Util.getSamzaVersion();
String hostName = Util.getLocalHost().getHostName();
Optional<String> diagnosticsReporterStreamName = metricsConfig.getMetricsSnapshotReporterStream(MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS);
if (!diagnosticsReporterStreamName.isPresent()) {
throw new ConfigException("Missing required config: " + String.format(MetricsConfig.METRICS_SNAPSHOT_REPORTER_STREAM, MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS));
}
SystemStream diagnosticsSystemStream = StreamUtil.getSystemStreamFromNames(diagnosticsReporterStreamName.get());
// Create a SystemProducer for DiagnosticsManager. This producer is used by the DiagnosticsManager
// to write to the same stream as the MetricsSnapshotReporter called `diagnosticsreporter`.
Optional<String> diagnosticsSystemFactoryName = new SystemConfig(config).getSystemFactory(diagnosticsSystemStream.getSystem());
if (!diagnosticsSystemFactoryName.isPresent()) {
throw new SamzaException("Missing factory in config for system " + diagnosticsSystemStream.getSystem());
}
SystemFactory systemFactory = ReflectionUtil.getObj(diagnosticsSystemFactoryName.get(), SystemFactory.class);
SystemProducer systemProducer = systemFactory.getProducer(diagnosticsSystemStream.getSystem(), config, new MetricsRegistryMap(), DiagnosticsUtil.class.getSimpleName());
DiagnosticsManager diagnosticsManager = new DiagnosticsManager(jobName, jobId, jobModel.getContainers(), containerMemoryMb, containerNumCores, new StorageConfig(config).getNumPersistentStores(), maxHeapSizeBytes, containerThreadPoolSize, containerId, executionEnvContainerId.orElse(""), samzaEpochId.orElse(""), taskClassVersion, samzaVersion, hostName, diagnosticsSystemStream, systemProducer, Duration.ofMillis(new TaskConfig(config).getShutdownMs()), jobConfig.getAutosizingEnabled(), config);
diagnosticsManagerOptional = Optional.of(diagnosticsManager);
}
return diagnosticsManagerOptional;
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerPlacementActions method testContainerPlacementsForJobRunningInDegradedState.
@Test(timeout = 20000)
public void testContainerPlacementsForJobRunningInDegradedState() throws Exception {
// Set failure after retries to false to enable job running in degraded state
config = new MapConfig(configVals, getConfigWithHostAffinityAndRetries(true, 1, false));
state = new SamzaApplicationState(JobModelManagerTestUtil.getJobModelManager(getConfig(), 2, this.server));
callback = mock(ClusterResourceManager.Callback.class);
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(config);
containerManager = spy(new ContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, true, false, localityManager, faultDomainManager, config));
allocatorWithHostAffinity = new MockContainerAllocatorWithHostAffinity(clusterResourceManager, config, state, containerManager);
cpm = new ContainerProcessManager(clusterManagerConfig, state, new MetricsRegistryMap(), clusterResourceManager, Optional.of(allocatorWithHostAffinity), containerManager, localityManager, false);
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesAvailable(anyList());
// Mimic stream processor launch failure only on host-2,
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onStreamProcessorLaunchSuccess(any());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesCompleted(anyList());
cpm.start();
if (!allocatorWithHostAffinity.awaitContainersStart(2, 5, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// App is in running state with two containers running
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.preferredHostRequests.get(), 2);
assertEquals(state.anyHostRequests.get(), 0);
// Trigger a container failure
clusterResourceManager.stopStreamProcessor(state.runningProcessors.get("1"), -103);
// Wait for container to start
if (!allocatorWithHostAffinity.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// Trigger a container failure again
clusterResourceManager.stopStreamProcessor(state.runningProcessors.get("1"), -103);
// Ensure that this container has exhausted all retires
while (state.failedProcessors.size() != 1 && state.runningProcessors.size() != 1) {
Thread.sleep(100);
}
// At this point the application should only have one container running
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.size(), 1);
assertEquals(state.pendingProcessors.size(), 0);
assertTrue(state.failedProcessors.containsKey("1"));
ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "app-attempt-001", "1", "host-3", System.currentTimeMillis());
ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
// Wait for the ControlAction to complete
if (!allocatorWithHostAffinity.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
// Wait for both the containers to be in running state & control action metadata to succeed
while (state.runningProcessors.size() != 2 && metadata.getActionStatus() != ContainerPlacementMessage.StatusCode.SUCCEEDED) {
Thread.sleep(100);
}
assertEquals(state.preferredHostRequests.get(), 4);
assertEquals(state.runningProcessors.size(), 2);
// Container 1 should not go to host-3
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-3");
assertEquals(state.anyHostRequests.get(), 0);
// Failed processors must be empty
assertEquals(state.failedProcessors.size(), 0);
}
Aggregations