Search in sources :

Example 1 with PSAgentAttemptId

use of com.tencent.angel.psagent.PSAgentAttemptId in project angel by Tencent.

the class WorkerTest method testPsAgent.

@Test
public void testPsAgent() throws Exception {
    try {
        localWorker = LocalClusterContext.get().getWorker(worker0Attempt0Id);
        worker = localWorker.getWorker();
        // test psAgent
        PSAgent psAgent = worker.getPSAgent();
        assertTrue(psAgent != null);
        PSAgentAttemptId psAgentAttemptId = psAgent.getId();
        Assert.assertEquals(psAgentAttemptId.toString(), "PSAgentAttempt_0_0");
        assertEquals(psAgent.getMasterLocation(), worker.getMasterLocation());
    } catch (Exception x) {
        LOG.error("run testPsAgent failed ", x);
        throw x;
    }
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAgent(com.tencent.angel.psagent.PSAgent) IOException(java.io.IOException) MasterServiceTest(com.tencent.angel.master.MasterServiceTest)

Example 2 with PSAgentAttemptId

use of com.tencent.angel.psagent.PSAgentAttemptId in project angel by Tencent.

the class MasterService method psAgentError.

/**
 * psagent run falied
 *
 * @param controller rpc controller of protobuf
 * @param request contains psagent attempt id, error message
 * @throws ServiceException
 */
@SuppressWarnings("unchecked")
@Override
public PSAgentErrorResponse psAgentError(RpcController controller, PSAgentErrorRequest request) throws ServiceException {
    PSAgentErrorResponse.Builder resBuilder = PSAgentErrorResponse.newBuilder();
    PSAgentAttemptId psAgentAttemptId = ProtobufUtil.convertToId(request.getPsAgentAttemptId());
    if (!psAgentLastHeartbeatTS.containsKey(psAgentAttemptId)) {
        LOG.error("psagent attempt " + psAgentAttemptId + " is not in running worker attempt set now, shutdown it");
        resBuilder.setCommand(PSAgentCommandProto.PSAGENT_SHUTDOWN);
    } else {
        psAgentLastHeartbeatTS.remove(psAgentAttemptId);
        LOG.error("error happened in psagent " + psAgentAttemptId + ", error msg:" + request.getMsg());
        context.getEventHandler().handle(new PSAgentAttemptDiagnosticsUpdateEvent(psAgentAttemptId, request.getMsg()));
        context.getEventHandler().handle(new PSAgentAttemptEvent(PSAgentAttemptEventType.PSAGENT_ATTEMPT_FAILMSG, psAgentAttemptId));
        resBuilder.setCommand(PSAgentCommandProto.PSAGENT_SUCCESS);
    }
    return resBuilder.build();
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId)

Example 3 with PSAgentAttemptId

use of com.tencent.angel.psagent.PSAgentAttemptId in project angel by Tencent.

the class MasterService method psAgentReport.

/**
 * response for psagent heartbeat.
 *
 * @param controller rpc controller of protobuf
 * @param request
 * @throws ServiceException
 */
@SuppressWarnings("unchecked")
@Override
public PSAgentReportResponse psAgentReport(RpcController controller, PSAgentReportRequest request) throws ServiceException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("receive ps agent state report, request=" + request);
    }
    PSAgentAttemptId psAgentAttemptId = ProtobufUtil.convertToId(request.getPsAgentAttemptId());
    if (!psAgentLastHeartbeatTS.containsKey(psAgentAttemptId)) {
        LOG.error("psagent attempt " + psAgentAttemptId + " is not in running worker attempt set now, shutdown it");
        return PSAgentReportResponse.newBuilder().setCommand(PSAgentCommandProto.PSAGENT_SHUTDOWN).build();
    } else {
        context.getEventHandler().handle(new PSAgentAttemptStateUpdateEvent(psAgentAttemptId, request));
        psAgentLastHeartbeatTS.put(psAgentAttemptId, System.currentTimeMillis());
        return PSAgentReportResponse.newBuilder().setCommand(PSAgentCommandProto.PSAGENT_SUCCESS).build();
    }
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId)

Example 4 with PSAgentAttemptId

use of com.tencent.angel.psagent.PSAgentAttemptId in project angel by Tencent.

the class MasterService method psAgentDone.

/**
 * psagent run over successfully
 *
 * @param controller rpc controller of protobuf
 * @param request contains psagent attempt id
 * @throws ServiceException
 */
@SuppressWarnings("unchecked")
@Override
public PSAgentDoneResponse psAgentDone(RpcController controller, PSAgentDoneRequest request) throws ServiceException {
    PSAgentAttemptId psAgentAttemptId = ProtobufUtil.convertToId(request.getPsAgentAttemptId());
    LOG.info("psagent " + psAgentAttemptId + " is done");
    PSAgentDoneResponse.Builder resBuilder = PSAgentDoneResponse.newBuilder();
    if (!psAgentLastHeartbeatTS.containsKey(psAgentAttemptId)) {
        LOG.error("psagent attempt " + psAgentAttemptId + " is not in running worker attempt set now, shutdown it");
        resBuilder.setCommand(PSAgentCommandProto.PSAGENT_SHUTDOWN);
    } else {
        psAgentLastHeartbeatTS.remove(psAgentAttemptId);
        resBuilder.setCommand(PSAgentCommandProto.PSAGENT_SUCCESS);
        context.getEventHandler().handle(new PSAgentAttemptEvent(PSAgentAttemptEventType.PSAGENT_ATTEMPT_SUCCESS, psAgentAttemptId));
    }
    return resBuilder.build();
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId)

Example 5 with PSAgentAttemptId

use of com.tencent.angel.psagent.PSAgentAttemptId in project angel by Tencent.

the class MasterService method psAgentRegister.

/**
 * response for psagent heartbeat.
 *
 * @param controller rpc controller of protobuf
 * @param request contains psagent attempt id
 * @throws ServiceException
 */
@SuppressWarnings("unchecked")
@Override
public PSAgentRegisterResponse psAgentRegister(RpcController controller, PSAgentRegisterRequest request) throws ServiceException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("receive ps agent register, request=" + request);
    }
    PSAgentRegisterResponse.Builder registerResponseBuilder = PSAgentRegisterResponse.newBuilder();
    PSAgentAttemptId psAgentAttemptId = ProtobufUtil.convertToId(request.getPsAgentAttemptId());
    if (!psAgentLastHeartbeatTS.containsKey(psAgentAttemptId)) {
        LOG.error("psagent attempt " + psAgentAttemptId + " is not in running worker attempt set now, shutdown it");
        registerResponseBuilder.setCommand(PSAgentCommandProto.PSAGENT_SHUTDOWN);
    } else {
        registerPSAgentAttemptId(psAgentAttemptId);
        Location location = new Location(request.getLocation().getIp(), request.getLocation().getPort());
        context.getEventHandler().handle(new PSAgentRegisterEvent(psAgentAttemptId, location));
        registerResponseBuilder.setCommand(PSAgentCommandProto.PSAGENT_SUCCESS);
    }
    LOG.info("psagent " + psAgentAttemptId + " register finished!");
    return registerResponseBuilder.build();
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSLocation(com.tencent.angel.ml.matrix.transport.PSLocation) Location(com.tencent.angel.common.location.Location)

Aggregations

PSAgentAttemptId (com.tencent.angel.psagent.PSAgentAttemptId)11 PSAttemptId (com.tencent.angel.ps.PSAttemptId)4 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)4 Id (com.tencent.angel.common.Id)3 PSAttemptContainerAssignedEvent (com.tencent.angel.master.ps.attempt.PSAttemptContainerAssignedEvent)2 PSAgentAttemptContainerAssignedEvent (com.tencent.angel.master.psagent.PSAgentAttemptContainerAssignedEvent)2 WorkerAttemptContainerAssignedEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptContainerAssignedEvent)2 Location (com.tencent.angel.common.location.Location)1 LocalPS (com.tencent.angel.localcluster.LocalPS)1 LocalWorker (com.tencent.angel.localcluster.LocalWorker)1 MasterServiceTest (com.tencent.angel.master.MasterServiceTest)1 PSAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.ps.attempt.PSAttemptDiagnosticsUpdateEvent)1 PSAttemptEvent (com.tencent.angel.master.ps.attempt.PSAttemptEvent)1 PSAgentAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.psagent.PSAgentAttemptDiagnosticsUpdateEvent)1 PSAgentAttemptEvent (com.tencent.angel.master.psagent.PSAgentAttemptEvent)1 WorkerAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptDiagnosticsUpdateEvent)1 WorkerAttemptEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptEvent)1 PSLocation (com.tencent.angel.ml.matrix.transport.PSLocation)1 PSAgent (com.tencent.angel.psagent.PSAgent)1 IOException (java.io.IOException)1