Search in sources :

Example 6 with LSWorkerHeartbeat

use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.

the class SlotTest method testReschedule.

@Test
public void testReschedule() throws Exception {
    try (SimulatedTime t = new SimulatedTime(1010)) {
        int port = 8080;
        String cTopoId = "CURRENT";
        List<ExecutorInfo> cExecList = mkExecutorInfoList(1, 2, 3, 4, 5);
        LocalAssignment cAssignment = mkLocalAssignment(cTopoId, cExecList, mkWorkerResources(100.0, 100.0, 100.0));
        Container cContainer = mock(Container.class);
        LSWorkerHeartbeat chb = mkWorkerHB(cTopoId, port, cExecList, Time.currentTimeSecs());
        when(cContainer.readHeartbeat()).thenReturn(chb);
        when(cContainer.areAllProcessesDead()).thenReturn(false, true);
        String nTopoId = "NEW";
        List<ExecutorInfo> nExecList = mkExecutorInfoList(1, 2, 3, 4, 5);
        LocalAssignment nAssignment = mkLocalAssignment(nTopoId, nExecList, mkWorkerResources(100.0, 100.0, 100.0));
        ILocalizer localizer = mock(ILocalizer.class);
        Container nContainer = mock(Container.class);
        LocalState state = mock(LocalState.class);
        ContainerLauncher containerLauncher = mock(ContainerLauncher.class);
        when(containerLauncher.launchContainer(port, nAssignment, state)).thenReturn(nContainer);
        LSWorkerHeartbeat nhb = mkWorkerHB(nTopoId, 100, nExecList, Time.currentTimeSecs());
        when(nContainer.readHeartbeat()).thenReturn(nhb, nhb);
        @SuppressWarnings("unchecked") Future<Void> baseFuture = mock(Future.class);
        when(localizer.requestDownloadBaseTopologyBlobs(nAssignment, port)).thenReturn(baseFuture);
        @SuppressWarnings("unchecked") Future<Void> blobFuture = mock(Future.class);
        when(localizer.requestDownloadTopologyBlobs(nAssignment, port)).thenReturn(blobFuture);
        ISupervisor iSuper = mock(ISupervisor.class);
        StaticState staticState = new StaticState(localizer, 5000, 120000, 1000, 1000, containerLauncher, "localhost", port, iSuper, state);
        DynamicState dynamicState = new DynamicState(cAssignment, cContainer, nAssignment);
        DynamicState nextState = Slot.stateMachineStep(dynamicState, staticState);
        assertEquals(MachineState.KILL, nextState.state);
        verify(cContainer).kill();
        verify(localizer).requestDownloadBaseTopologyBlobs(nAssignment, port);
        assertSame("pendingDownload not set properly", baseFuture, nextState.pendingDownload);
        assertEquals(nAssignment, nextState.pendingLocalization);
        assertTrue(Time.currentTimeMillis() > 1000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.KILL, nextState.state);
        verify(cContainer).forceKill();
        assertSame("pendingDownload not set properly", baseFuture, nextState.pendingDownload);
        assertEquals(nAssignment, nextState.pendingLocalization);
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.WAITING_FOR_BASIC_LOCALIZATION, nextState.state);
        verify(cContainer).cleanUp();
        verify(localizer).releaseSlotFor(cAssignment, port);
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.WAITING_FOR_BLOB_LOCALIZATION, nextState.state);
        verify(baseFuture).get(1000, TimeUnit.MILLISECONDS);
        verify(localizer).requestDownloadTopologyBlobs(nAssignment, port);
        assertSame("pendingDownload not set properly", blobFuture, nextState.pendingDownload);
        assertEquals(nAssignment, nextState.pendingLocalization);
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        verify(blobFuture).get(1000, TimeUnit.MILLISECONDS);
        verify(containerLauncher).launchContainer(port, nAssignment, state);
        assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
        assertSame("pendingDownload is not null", null, nextState.pendingDownload);
        assertSame(null, nextState.pendingLocalization);
        assertSame(nAssignment, nextState.currentAssignment);
        assertSame(nContainer, nextState.container);
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.RUNNING, nextState.state);
        assertSame("pendingDownload is not null", null, nextState.pendingDownload);
        assertSame(null, nextState.pendingLocalization);
        assertSame(nAssignment, nextState.currentAssignment);
        assertSame(nContainer, nextState.container);
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.RUNNING, nextState.state);
        assertSame("pendingDownload is not null", null, nextState.pendingDownload);
        assertSame(null, nextState.pendingLocalization);
        assertSame(nAssignment, nextState.currentAssignment);
        assertSame(nContainer, nextState.container);
        assertTrue(Time.currentTimeMillis() > 3000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.RUNNING, nextState.state);
        assertSame("pendingDownload is not null", null, nextState.pendingDownload);
        assertSame(null, nextState.pendingLocalization);
        assertSame(nAssignment, nextState.currentAssignment);
        assertSame(nContainer, nextState.container);
        assertTrue(Time.currentTimeMillis() > 4000);
    }
}
Also used : SimulatedTime(org.apache.storm.utils.Time.SimulatedTime) StaticState(org.apache.storm.daemon.supervisor.Slot.StaticState) ISupervisor(org.apache.storm.scheduler.ISupervisor) LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat) ExecutorInfo(org.apache.storm.generated.ExecutorInfo) ILocalizer(org.apache.storm.localizer.ILocalizer) LocalAssignment(org.apache.storm.generated.LocalAssignment) DynamicState(org.apache.storm.daemon.supervisor.Slot.DynamicState) LocalState(org.apache.storm.utils.LocalState) Test(org.junit.Test)

Example 7 with LSWorkerHeartbeat

use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.

the class SlotTest method testRelaunch.

@Test
public void testRelaunch() throws Exception {
    try (SimulatedTime t = new SimulatedTime(1010)) {
        int port = 8080;
        String topoId = "CURRENT";
        List<ExecutorInfo> execList = mkExecutorInfoList(1, 2, 3, 4, 5);
        LocalAssignment assignment = mkLocalAssignment(topoId, execList, mkWorkerResources(100.0, 100.0, 100.0));
        ILocalizer localizer = mock(ILocalizer.class);
        Container container = mock(Container.class);
        ContainerLauncher containerLauncher = mock(ContainerLauncher.class);
        LSWorkerHeartbeat oldhb = mkWorkerHB(topoId, port, execList, Time.currentTimeSecs() - 10);
        LSWorkerHeartbeat goodhb = mkWorkerHB(topoId, port, execList, Time.currentTimeSecs());
        when(container.readHeartbeat()).thenReturn(oldhb, oldhb, goodhb, goodhb);
        when(container.areAllProcessesDead()).thenReturn(false, true);
        ISupervisor iSuper = mock(ISupervisor.class);
        LocalState state = mock(LocalState.class);
        StaticState staticState = new StaticState(localizer, 5000, 120000, 1000, 1000, containerLauncher, "localhost", port, iSuper, state);
        DynamicState dynamicState = new DynamicState(assignment, container, assignment);
        DynamicState nextState = Slot.stateMachineStep(dynamicState, staticState);
        assertEquals(MachineState.KILL_AND_RELAUNCH, nextState.state);
        verify(container).kill();
        assertTrue(Time.currentTimeMillis() > 1000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.KILL_AND_RELAUNCH, nextState.state);
        verify(container).forceKill();
        assertTrue(Time.currentTimeMillis() > 2000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
        verify(container).relaunch();
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.WAITING_FOR_WORKER_START, nextState.state);
        assertTrue(Time.currentTimeMillis() > 3000);
        nextState = Slot.stateMachineStep(nextState, staticState);
        assertEquals(MachineState.RUNNING, nextState.state);
    }
}
Also used : SimulatedTime(org.apache.storm.utils.Time.SimulatedTime) StaticState(org.apache.storm.daemon.supervisor.Slot.StaticState) ISupervisor(org.apache.storm.scheduler.ISupervisor) LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat) ExecutorInfo(org.apache.storm.generated.ExecutorInfo) ILocalizer(org.apache.storm.localizer.ILocalizer) LocalAssignment(org.apache.storm.generated.LocalAssignment) DynamicState(org.apache.storm.daemon.supervisor.Slot.DynamicState) LocalState(org.apache.storm.utils.LocalState) Test(org.junit.Test)

Example 8 with LSWorkerHeartbeat

use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.

the class SlotTest method mkWorkerHB.

static LSWorkerHeartbeat mkWorkerHB(String id, int port, List<ExecutorInfo> exec, Integer timeSecs) {
    LSWorkerHeartbeat ret = new LSWorkerHeartbeat();
    ret.set_topology_id(id);
    ret.set_port(port);
    ret.set_executors(exec);
    ret.set_time_secs(timeSecs);
    return ret;
}
Also used : LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat)

Example 9 with LSWorkerHeartbeat

use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.

the class Worker method doHeartBeat.

public void doHeartBeat() throws IOException {
    LocalState state = ConfigUtils.workerState(workerState.conf, workerState.workerId);
    state.setWorkerHeartBeat(new LSWorkerHeartbeat(Time.currentTimeSecs(), workerState.topologyId, workerState.executors.stream().map(executor -> new ExecutorInfo(executor.get(0).intValue(), executor.get(1).intValue())).collect(Collectors.toList()), workerState.port));
    // this is just in case supervisor is down so that disk doesn't fill up.
    state.cleanup(60);
// it shouldn't take supervisor 120 seconds between listing dir and reading it
}
Also used : LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat) IRunningExecutor(org.apache.storm.executor.IRunningExecutor) LoggerFactory(org.slf4j.LoggerFactory) AuthUtils(org.apache.storm.security.auth.AuthUtils) DaemonType(org.apache.storm.cluster.DaemonType) Map(java.util.Map) LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat) Executor(org.apache.storm.executor.Executor) IStateStorage(org.apache.storm.cluster.IStateStorage) Collection(java.util.Collection) ExecutorShutdown(org.apache.storm.executor.ExecutorShutdown) WorkerBackpressureCallback(org.apache.storm.utils.WorkerBackpressureCallback) LogConfig(org.apache.storm.generated.LogConfig) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) Collectors(java.util.stream.Collectors) ObjectUtils(org.apache.commons.lang.ObjectUtils) Credentials(org.apache.storm.generated.Credentials) Time(org.apache.storm.utils.Time) List(java.util.List) ConfigUtils(org.apache.storm.utils.ConfigUtils) SysOutOverSLF4J(uk.org.lidalia.sysoutslf4j.context.SysOutOverSLF4J) IContext(org.apache.storm.messaging.IContext) Config(org.apache.storm.Config) Shutdownable(org.apache.storm.daemon.Shutdownable) StormCommon(org.apache.storm.daemon.StormCommon) ClusterUtils(org.apache.storm.cluster.ClusterUtils) DisruptorBackpressureCallback(org.apache.storm.utils.DisruptorBackpressureCallback) LocalExecutor(org.apache.storm.executor.LocalExecutor) HashMap(java.util.HashMap) ACL(org.apache.zookeeper.data.ACL) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) ArrayList(java.util.ArrayList) IAutoCredentials(org.apache.storm.security.auth.IAutoCredentials) Charset(java.nio.charset.Charset) ExecutorInfo(org.apache.storm.generated.ExecutorInfo) WorkerBackpressureThread(org.apache.storm.utils.WorkerBackpressureThread) DaemonCommon(org.apache.storm.daemon.DaemonCommon) EventHandler(com.lmax.disruptor.EventHandler) ClusterStateContext(org.apache.storm.cluster.ClusterStateContext) LocalState(org.apache.storm.utils.LocalState) Logger(org.slf4j.Logger) ExecutorStats(org.apache.storm.generated.ExecutorStats) IConnection(org.apache.storm.messaging.IConnection) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) TaskMessage(org.apache.storm.messaging.TaskMessage) IStormClusterState(org.apache.storm.cluster.IStormClusterState) Utils(org.apache.storm.utils.Utils) File(java.io.File) Subject(javax.security.auth.Subject) Preconditions(com.google.common.base.Preconditions) StatsUtil(org.apache.storm.stats.StatsUtil) ExecutorInfo(org.apache.storm.generated.ExecutorInfo) LocalState(org.apache.storm.utils.LocalState)

Example 10 with LSWorkerHeartbeat

use of org.apache.storm.generated.LSWorkerHeartbeat in project storm by apache.

the class Slot method handleRunning.

/**
     * State Transitions for RUNNING state.
     * PRECONDITION: container != null && currentAssignment != null
     * @param dynamicState current state
     * @param staticState static data
     * @return the next state
     * @throws Exception on any error
     */
static DynamicState handleRunning(DynamicState dynamicState, StaticState staticState) throws Exception {
    assert (dynamicState.container != null);
    assert (dynamicState.currentAssignment != null);
    if (!equivalent(dynamicState.newAssignment, dynamicState.currentAssignment)) {
        LOG.warn("SLOT {}: Assignment Changed from {} to {}", staticState.port, dynamicState.currentAssignment, dynamicState.newAssignment);
        //Scheduling changed while running...
        return killContainerForChangedAssignment(dynamicState, staticState);
    }
    if (dynamicState.container.didMainProcessExit()) {
        LOG.warn("SLOT {}: main process has exited", staticState.port);
        return killAndRelaunchContainer(dynamicState, staticState);
    }
    LSWorkerHeartbeat hb = dynamicState.container.readHeartbeat();
    if (hb == null) {
        LOG.warn("SLOT {}: HB returned as null", staticState.port);
        // worker that never came up.
        return killAndRelaunchContainer(dynamicState, staticState);
    }
    long timeDiffMs = (Time.currentTimeSecs() - hb.get_time_secs()) * 1000;
    if (timeDiffMs > staticState.hbTimeoutMs) {
        LOG.warn("SLOT {}: HB is too old {} > {}", staticState.port, timeDiffMs, staticState.hbTimeoutMs);
        return killAndRelaunchContainer(dynamicState, staticState);
    }
    //The worker is up and running check for profiling requests
    if (!dynamicState.profileActions.isEmpty()) {
        HashSet<TopoProfileAction> mod = new HashSet<>(dynamicState.profileActions);
        HashSet<TopoProfileAction> modPending = new HashSet<>(dynamicState.pendingStopProfileActions);
        Iterator<TopoProfileAction> iter = mod.iterator();
        while (iter.hasNext()) {
            TopoProfileAction action = iter.next();
            if (!action.topoId.equals(dynamicState.currentAssignment.get_topology_id())) {
                iter.remove();
                LOG.warn("Dropping {} wrong topology is running", action);
            //Not for this topology so skip it
            } else {
                if (modPending.contains(action)) {
                    boolean isTimeForStop = Time.currentTimeMillis() > action.request.get_time_stamp();
                    if (isTimeForStop) {
                        if (dynamicState.container.runProfiling(action.request, true)) {
                            LOG.debug("Stopped {} action finished", action);
                            iter.remove();
                            modPending.remove(action);
                        } else {
                            LOG.warn("Stopping {} failed, will be retried", action);
                        }
                    } else {
                        LOG.debug("Still pending {} now: {}", action, Time.currentTimeMillis());
                    }
                } else {
                    // start profiling and save it away to stop when timeout happens
                    if (action.request.get_action() == ProfileAction.JPROFILE_STOP) {
                        if (dynamicState.container.runProfiling(action.request, false)) {
                            modPending.add(action);
                            LOG.debug("Started {} now: {}", action, Time.currentTimeMillis());
                        } else {
                            LOG.warn("Starting {} failed, will be retried", action);
                        }
                    } else {
                        if (dynamicState.container.runProfiling(action.request, false)) {
                            LOG.debug("Started {} action finished", action);
                            iter.remove();
                        } else {
                            LOG.warn("Starting {} failed, will be retried", action);
                        }
                    }
                }
            }
        }
        dynamicState = dynamicState.withProfileActions(mod, modPending);
    }
    Time.sleep(staticState.monitorFreqMs);
    return dynamicState;
}
Also used : LSWorkerHeartbeat(org.apache.storm.generated.LSWorkerHeartbeat) HashSet(java.util.HashSet)

Aggregations

LSWorkerHeartbeat (org.apache.storm.generated.LSWorkerHeartbeat)11 LocalState (org.apache.storm.utils.LocalState)7 ExecutorInfo (org.apache.storm.generated.ExecutorInfo)6 DynamicState (org.apache.storm.daemon.supervisor.Slot.DynamicState)5 StaticState (org.apache.storm.daemon.supervisor.Slot.StaticState)5 LocalAssignment (org.apache.storm.generated.LocalAssignment)5 ILocalizer (org.apache.storm.localizer.ILocalizer)4 ISupervisor (org.apache.storm.scheduler.ISupervisor)4 SimulatedTime (org.apache.storm.utils.Time.SimulatedTime)4 Test (org.junit.Test)4 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Preconditions (com.google.common.base.Preconditions)1 EventHandler (com.lmax.disruptor.EventHandler)1 File (java.io.File)1 IOException (java.io.IOException)1 Charset (java.nio.charset.Charset)1 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1