use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.
the class SlotTest method testReschedule.
@Test
public void testReschedule() throws Exception {
try (SimulatedTime t = new SimulatedTime(1010)) {
int port = 8080;
String cTopoId = "CURRENT";
List<ExecutorInfo> cExecList = mkExecutorInfoList(1, 2, 3, 4, 5);
LocalAssignment cAssignment = mkLocalAssignment(cTopoId, cExecList, mkWorkerResources(100.0, 100.0, 100.0));
Container cContainer = mock(Container.class);
LSWorkerHeartbeat chb = mkWorkerHB(cTopoId, port, cExecList, Time.currentTimeSecs());
when(cContainer.readHeartbeat()).thenReturn(chb);
when(cContainer.areAllProcessesDead()).thenReturn(false, true);
String nTopoId = "NEW";
List<ExecutorInfo> nExecList = mkExecutorInfoList(1, 2, 3, 4, 5);
LocalAssignment nAssignment = mkLocalAssignment(nTopoId, nExecList, mkWorkerResources(100.0, 100.0, 100.0));
ILocalizer localizer = mock(ILocalizer.class);
Container nContainer = mock(Container.class);
LocalState state = mock(LocalState.class);
ContainerLauncher containerLauncher = mock(ContainerLauncher.class);
when(containerLauncher.launchContainer(port, nAssignment, state)).thenReturn(nContainer);
LSWorkerHeartbeat nhb = mkWorkerHB(nTopoId, 100, nExecList, Time.currentTimeSecs());
when(nContainer.readHeartbeat()).thenReturn(nhb, nhb);
@SuppressWarnings("unchecked") Future<Void> baseFuture = mock(Future.class);
when(localizer.requestDownloadBaseTopologyBlobs(nAssignment, port)).thenReturn(baseFuture);
@SuppressWarnings("unchecked") Future<Void> blobFuture = mock(Future.class);
when(localizer.requestDownloadTopologyBlobs(nAssignment, port)).thenReturn(blobFuture);
ISupervisor iSuper = mock(ISupervisor.class);
StaticState staticState = new StaticState(localizer, 5000, 120000, 1000, 1000, containerLauncher, "localhost", port, iSuper, state);
DynamicState dynamicState = new DynamicState(cAssignment, cContainer, nAssignment);
DynamicState nextState = Slot.stateMachineStep(dynamicState, staticState);
assertEquals(MachineState.KILL, nextState.state);
verify(cContainer).kill();
verify(localizer).requestDownloadBaseTopologyBlobs(nAssignment, port);
assertSame("pendingDownload not set properly", baseFuture, nextState.pendingDownload);
assertEquals(nAssignment, nextState.pendingLocalization);
assertTrue(Time.currentTimeMillis() > 1000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.KILL, nextState.state);
verify(cContainer).forceKill();
assertSame("pendingDownload not set properly", baseFuture, nextState.pendingDownload);
assertEquals(nAssignment, nextState.pendingLocalization);
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.WAITING_FOR_BASIC_LOCALIZATION, nextState.state);
verify(cContainer).cleanUp();
verify(localizer).releaseSlotFor(cAssignment, port);
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.WAITING_FOR_BLOB_LOCALIZATION, nextState.state);
verify(baseFuture).get(1000, TimeUnit.MILLISECONDS);
verify(localizer).requestDownloadTopologyBlobs(nAssignment, port);
assertSame("pendingDownload not set properly", blobFuture, nextState.pendingDownload);
assertEquals(nAssignment, nextState.pendingLocalization);
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
verify(blobFuture).get(1000, TimeUnit.MILLISECONDS);
verify(containerLauncher).launchContainer(port, nAssignment, state);
assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
assertSame("pendingDownload is not null", null, nextState.pendingDownload);
assertSame(null, nextState.pendingLocalization);
assertSame(nAssignment, nextState.currentAssignment);
assertSame(nContainer, nextState.container);
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.RUNNING, nextState.state);
assertSame("pendingDownload is not null", null, nextState.pendingDownload);
assertSame(null, nextState.pendingLocalization);
assertSame(nAssignment, nextState.currentAssignment);
assertSame(nContainer, nextState.container);
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.RUNNING, nextState.state);
assertSame("pendingDownload is not null", null, nextState.pendingDownload);
assertSame(null, nextState.pendingLocalization);
assertSame(nAssignment, nextState.currentAssignment);
assertSame(nContainer, nextState.container);
assertTrue(Time.currentTimeMillis() > 3000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.RUNNING, nextState.state);
assertSame("pendingDownload is not null", null, nextState.pendingDownload);
assertSame(null, nextState.pendingLocalization);
assertSame(nAssignment, nextState.currentAssignment);
assertSame(nContainer, nextState.container);
assertTrue(Time.currentTimeMillis() > 4000);
}
}
use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.
the class SlotTest method testRelaunch.
@Test
public void testRelaunch() throws Exception {
try (SimulatedTime t = new SimulatedTime(1010)) {
int port = 8080;
String topoId = "CURRENT";
List<ExecutorInfo> execList = mkExecutorInfoList(1, 2, 3, 4, 5);
LocalAssignment assignment = mkLocalAssignment(topoId, execList, mkWorkerResources(100.0, 100.0, 100.0));
ILocalizer localizer = mock(ILocalizer.class);
Container container = mock(Container.class);
ContainerLauncher containerLauncher = mock(ContainerLauncher.class);
LSWorkerHeartbeat oldhb = mkWorkerHB(topoId, port, execList, Time.currentTimeSecs() - 10);
LSWorkerHeartbeat goodhb = mkWorkerHB(topoId, port, execList, Time.currentTimeSecs());
when(container.readHeartbeat()).thenReturn(oldhb, oldhb, goodhb, goodhb);
when(container.areAllProcessesDead()).thenReturn(false, true);
ISupervisor iSuper = mock(ISupervisor.class);
LocalState state = mock(LocalState.class);
StaticState staticState = new StaticState(localizer, 5000, 120000, 1000, 1000, containerLauncher, "localhost", port, iSuper, state);
DynamicState dynamicState = new DynamicState(assignment, container, assignment);
DynamicState nextState = Slot.stateMachineStep(dynamicState, staticState);
assertEquals(MachineState.KILL_AND_RELAUNCH, nextState.state);
verify(container).kill();
assertTrue(Time.currentTimeMillis() > 1000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.KILL_AND_RELAUNCH, nextState.state);
verify(container).forceKill();
assertTrue(Time.currentTimeMillis() > 2000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
verify(container).relaunch();
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
assertTrue(Time.currentTimeMillis() > 3000);
nextState = Slot.stateMachineStep(nextState, staticState);
assertEquals(MachineState.RUNNING, nextState.state);
}
}
use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.
the class SlotTest method mkWorkerHB.
static LSWorkerHeartbeat mkWorkerHB(String id, int port, List<ExecutorInfo> exec, Integer timeSecs) {
LSWorkerHeartbeat ret = new LSWorkerHeartbeat();
ret.set_topology_id(id);
ret.set_port(port);
ret.set_executors(exec);
ret.set_time_secs(timeSecs);
return ret;
}
use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.
the class Worker method doHeartBeat.
public void doHeartBeat() throws IOException {
LocalState state = ConfigUtils.workerState(workerState.conf, workerState.workerId);
state.setWorkerHeartBeat(new LSWorkerHeartbeat(Time.currentTimeSecs(), workerState.topologyId, workerState.executors.stream().map(executor -> new ExecutorInfo(executor.get(0).intValue(), executor.get(1).intValue())).collect(Collectors.toList()), workerState.port));
// this is just in case supervisor is down so that disk doesn't fill up.
state.cleanup(60);
// it shouldn't take supervisor 120 seconds between listing dir and reading it
}
use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.
the class Slot method handleRunning.
/**
* State Transitions for RUNNING state.
* PRECONDITION: container != null && currentAssignment != null
* @param dynamicState current state
* @param staticState static data
* @return the next state
* @throws Exception on any error
*/
static DynamicState handleRunning(DynamicState dynamicState, StaticState staticState) throws Exception {
assert (dynamicState.container != null);
assert (dynamicState.currentAssignment != null);
if (!equivalent(dynamicState.newAssignment, dynamicState.currentAssignment)) {
LOG.warn("SLOT {}: Assignment Changed from {} to {}", staticState.port, dynamicState.currentAssignment, dynamicState.newAssignment);
//Scheduling changed while running...
return killContainerForChangedAssignment(dynamicState, staticState);
}
if (dynamicState.container.didMainProcessExit()) {
LOG.warn("SLOT {}: main process has exited", staticState.port);
return killAndRelaunchContainer(dynamicState, staticState);
}
LSWorkerHeartbeat hb = dynamicState.container.readHeartbeat();
if (hb == null) {
LOG.warn("SLOT {}: HB returned as null", staticState.port);
// worker that never came up.
return killAndRelaunchContainer(dynamicState, staticState);
}
long timeDiffMs = (Time.currentTimeSecs() - hb.get_time_secs()) * 1000;
if (timeDiffMs > staticState.hbTimeoutMs) {
LOG.warn("SLOT {}: HB is too old {} > {}", staticState.port, timeDiffMs, staticState.hbTimeoutMs);
return killAndRelaunchContainer(dynamicState, staticState);
}
//The worker is up and running check for profiling requests
if (!dynamicState.profileActions.isEmpty()) {
HashSet<TopoProfileAction> mod = new HashSet<>(dynamicState.profileActions);
HashSet<TopoProfileAction> modPending = new HashSet<>(dynamicState.pendingStopProfileActions);
Iterator<TopoProfileAction> iter = mod.iterator();
while (iter.hasNext()) {
TopoProfileAction action = iter.next();
if (!action.topoId.equals(dynamicState.currentAssignment.get_topology_id())) {
iter.remove();
LOG.warn("Dropping {} wrong topology is running", action);
//Not for this topology so skip it
} else {
if (modPending.contains(action)) {
boolean isTimeForStop = Time.currentTimeMillis() > action.request.get_time_stamp();
if (isTimeForStop) {
if (dynamicState.container.runProfiling(action.request, true)) {
LOG.debug("Stopped {} action finished", action);
iter.remove();
modPending.remove(action);
} else {
LOG.warn("Stopping {} failed, will be retried", action);
}
} else {
LOG.debug("Still pending {} now: {}", action, Time.currentTimeMillis());
}
} else {
// start profiling and save it away to stop when timeout happens
if (action.request.get_action() == ProfileAction.JPROFILE_STOP) {
if (dynamicState.container.runProfiling(action.request, false)) {
modPending.add(action);
LOG.debug("Started {} now: {}", action, Time.currentTimeMillis());
} else {
LOG.warn("Starting {} failed, will be retried", action);
}
} else {
if (dynamicState.container.runProfiling(action.request, false)) {
LOG.debug("Started {} action finished", action);
iter.remove();
} else {
LOG.warn("Starting {} failed, will be retried", action);
}
}
}
}
}
dynamicState = dynamicState.withProfileActions(mod, modPending);
}
Time.sleep(staticState.monitorFreqMs);
return dynamicState;
}
Aggregations