Search in sources :

Example 1 with Id

use of com.tencent.angel.common.Id in project angel by Tencent.

the class LocalContainerAllocator method requestContainer.

@SuppressWarnings("unchecked")
private void requestContainer(ContainerAllocatorEvent event) {
    LocalContainer allocated = new LocalContainer();
    Id id = event.getTaskId();
    if (id instanceof PSAttemptId) {
        context.getEventHandler().handle(new PSAttemptContainerAssignedEvent((PSAttemptId) id, allocated));
    } else if (id instanceof PSAgentAttemptId) {
        context.getEventHandler().handle(new PSAgentAttemptContainerAssignedEvent((PSAgentAttemptId) id, allocated));
    } else if (id instanceof WorkerAttemptId) {
        context.getEventHandler().handle(new WorkerAttemptContainerAssignedEvent((WorkerAttemptId) id, allocated));
    }
}
Also used : PSAttemptContainerAssignedEvent(com.tencent.angel.master.ps.attempt.PSAttemptContainerAssignedEvent) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) PSAgentAttemptContainerAssignedEvent(com.tencent.angel.master.psagent.PSAgentAttemptContainerAssignedEvent) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) Id(com.tencent.angel.common.Id) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptContainerAssignedEvent(com.tencent.angel.master.worker.attempt.WorkerAttemptContainerAssignedEvent)

Example 2 with Id

use of com.tencent.angel.common.Id in project angel by Tencent.

the class LocalContainerLauncher method launch.

@SuppressWarnings("unchecked")
private void launch(ContainerLauncherEvent event) {
    Id id = event.getId();
    if (id instanceof PSAttemptId) {
        LocalPS ps = new LocalPS((PSAttemptId) id, context.getMasterService().getLocation(), context.getConf());
        context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCHED, (PSAttemptId) id));
        try {
            ps.start();
            LocalClusterContext.get().addPS((PSAttemptId) id, ps);
        } catch (Exception e) {
            LOG.error("launch ps failed.", e);
            context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCH_FAILED, (PSAttemptId) id));
        }
    } else if (id instanceof PSAgentAttemptId) {
        context.getEventHandler().handle(new PSAgentAttemptEvent(PSAgentAttemptEventType.PSAGENT_ATTEMPT_CONTAINER_LAUNCHED, (PSAgentAttemptId) id));
    } else if (id instanceof WorkerAttemptId) {
        LocalWorker worker = new LocalWorker(context.getConf(), context.getApplicationId(), context.getUser(), (WorkerAttemptId) id, context.getMasterService().getLocation(), 0, false);
        context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCHED, (WorkerAttemptId) id));
        try {
            worker.start();
            LocalClusterContext.get().addWorker((WorkerAttemptId) id, worker);
        } catch (Exception e) {
            LOG.error("launch worker failed.", e);
            context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCH_FAILED, (WorkerAttemptId) id));
        }
    }
}
Also used : PSAttemptId(com.tencent.angel.ps.PSAttemptId) PSAgentAttemptEvent(com.tencent.angel.master.psagent.PSAgentAttemptEvent) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) LocalWorker(com.tencent.angel.localcluster.LocalWorker) LocalPS(com.tencent.angel.localcluster.LocalPS) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) WorkerAttemptEvent(com.tencent.angel.master.worker.attempt.WorkerAttemptEvent) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) Id(com.tencent.angel.common.Id) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAttemptEvent(com.tencent.angel.master.ps.attempt.PSAttemptEvent)

Example 3 with Id

use of com.tencent.angel.common.Id in project angel by Tencent.

the class YarnContainerAllocator method assignContainers.

private void assignContainers(List<Container> allocatedContainers) {
    // try to assign to all nodes first to match node local
    Iterator<Container> it = allocatedContainers.iterator();
    while (it.hasNext()) {
        Container allocated = it.next();
        Map<String, LinkedList<Id>> hostToIDListMap = hostToIDListMaps.get(allocated.getPriority());
        Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
        if (hostToIDListMap == null || hostToIDListMap.isEmpty() || idToRequestMap == null || idToRequestMap.isEmpty()) {
            continue;
        }
        String host = allocated.getNodeId().getHost();
        LinkedList<Id> list = hostToIDListMap.get(host);
        while (list != null && list.size() > 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Host matched to the request list " + host);
            }
            Id tId = list.removeFirst();
            if (idToRequestMap.containsKey(tId)) {
                ContainerRequest assigned = idToRequestMap.remove(tId);
                containerAssigned(allocated, assigned);
                it.remove();
                hostLocalAssigned++;
                LOG.debug("Assigned based on host match " + host);
                break;
            }
        }
    }
    // try to match all rack local
    it = allocatedContainers.iterator();
    while (it.hasNext()) {
        Container allocated = it.next();
        Map<String, LinkedList<Id>> rackToIDListMap = rackToIDListMaps.get(allocated.getPriority());
        Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
        if (rackToIDListMap == null || rackToIDListMap.isEmpty() || idToRequestMap == null || idToRequestMap.isEmpty()) {
            continue;
        }
        String host = allocated.getNodeId().getHost();
        String rack = RackResolver.resolve(host).getNetworkLocation();
        LinkedList<Id> list = rackToIDListMap.get(rack);
        while (list != null && list.size() > 0) {
            Id tId = list.removeFirst();
            if (idToRequestMap.containsKey(tId)) {
                ContainerRequest assigned = idToRequestMap.remove(tId);
                containerAssigned(allocated, assigned);
                it.remove();
                rackLocalAssigned++;
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Assigned based on rack match " + rack);
                }
                break;
            }
        }
    }
    // assign remaining
    it = allocatedContainers.iterator();
    while (it.hasNext()) {
        Container allocated = it.next();
        Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
        if ((context.getPSAgentManager() != null && allocated.getPriority() == context.getPSAgentManager().getPsAgentPriority()) || idToRequestMap == null || idToRequestMap.isEmpty()) {
            continue;
        }
        Id tId = idToRequestMap.keySet().iterator().next();
        ContainerRequest assigned = idToRequestMap.remove(tId);
        containerAssigned(allocated, assigned);
        it.remove();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Assigned based on * match");
        }
    }
}
Also used : PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) Id(com.tencent.angel.common.Id)

Example 4 with Id

use of com.tencent.angel.common.Id in project angel by Tencent.

the class YarnContainerAllocator method handleFinishContainers.

@SuppressWarnings("unchecked")
private void handleFinishContainers(List<ContainerStatus> finishedContainers) {
    for (ContainerStatus cont : finishedContainers) {
        LOG.info("Received completed container:" + cont);
        Id id = assignedContainerToIDMap.get(cont.getContainerId());
        if (id == null) {
            LOG.error("Container complete event for unknown container id " + cont.getContainerId());
        } else {
            assignedContainerToIDMap.remove(cont.getContainerId());
            idToContainerMap.remove(id);
            // dispatch container exit message to corresponding components
            String diagnostics = StringInterner.weakIntern(cont.getDiagnostics());
            if (id instanceof PSAttemptId) {
                context.getEventHandler().handle(new PSAttemptDiagnosticsUpdateEvent(diagnostics, (PSAttemptId) id));
                context.getEventHandler().handle(createContainerFinishedEvent(cont, (PSAttemptId) id));
            } else if (id instanceof PSAgentAttemptId) {
                context.getEventHandler().handle(new PSAgentAttemptDiagnosticsUpdateEvent((PSAgentAttemptId) id, diagnostics));
                context.getEventHandler().handle(createContainerFinishedEvent(cont, (PSAgentAttemptId) id));
            } else if (id instanceof WorkerAttemptId) {
                context.getEventHandler().handle(new WorkerAttemptDiagnosticsUpdateEvent((WorkerAttemptId) id, diagnostics));
                context.getEventHandler().handle(createContainerFinishedEvent(cont, (WorkerAttemptId) id));
            }
        }
    }
}
Also used : PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) WorkerAttemptDiagnosticsUpdateEvent(com.tencent.angel.master.worker.attempt.WorkerAttemptDiagnosticsUpdateEvent) PSAttemptDiagnosticsUpdateEvent(com.tencent.angel.master.ps.attempt.PSAttemptDiagnosticsUpdateEvent) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAgentAttemptId(com.tencent.angel.psagent.PSAgentAttemptId) PSAttemptId(com.tencent.angel.ps.PSAttemptId) WorkerAttemptId(com.tencent.angel.worker.WorkerAttemptId) Id(com.tencent.angel.common.Id) PSAgentAttemptDiagnosticsUpdateEvent(com.tencent.angel.master.psagent.PSAgentAttemptDiagnosticsUpdateEvent)

Example 5 with Id

use of com.tencent.angel.common.Id in project angel by Tencent.

the class TaskCalPerfChecker method check.

@Override
public List<Id> check(AMContext context) {
    double slowestDiscount = context.getConf().getDouble(AngelConf.ANGEL_AM_TASK_SLOWEST_DISCOUNT, AngelConf.DEFAULT_ANGEL_AM_TASK_SLOWEST_DISCOUNT);
    LOG.info("start to check slow workers use TaskCalPerfChecker policy, slowestDiscount = " + slowestDiscount);
    Set<Id> slowWorkers = new HashSet<Id>();
    AMTaskManager taskManage = context.getTaskManager();
    WorkerManager workerManager = context.getWorkerManager();
    Collection<AMTask> tasks = taskManage.getTasks();
    long totalSamples = 0;
    long totalCalTimeMs = 0;
    double averageRate = 0.0;
    Map<TaskId, Double> taskIdToRateMap = new HashMap<TaskId, Double>(tasks.size());
    for (AMTask task : tasks) {
        if (task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_SAMPLES) && task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_TIME_MS)) {
            long sampleNum = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_SAMPLES));
            double calTimeMs = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_TIME_MS));
            LOG.info("for task " + task.getTaskId() + ", sampleNum = " + sampleNum + ", calTimeMs = " + calTimeMs);
            totalSamples += sampleNum;
            totalCalTimeMs += calTimeMs;
            if (sampleNum > 5000000) {
                LOG.info("task " + task.getTaskId() + " calculate rate = " + (calTimeMs * 10000 / sampleNum));
                taskIdToRateMap.put(task.getTaskId(), calTimeMs * 10000 / sampleNum);
            }
        }
    }
    if (totalSamples != 0) {
        averageRate = (double) totalCalTimeMs * 10000 / totalSamples;
    }
    LOG.info("totalSamples = " + totalSamples + ", totalCalTimeMs = " + totalCalTimeMs + ", average calulate time for 10000 samples = " + averageRate + ", the maximum calulate time for 10000 sample = " + averageRate / slowestDiscount);
    for (Map.Entry<TaskId, Double> rateEntry : taskIdToRateMap.entrySet()) {
        if (averageRate < rateEntry.getValue() * slowestDiscount) {
            LOG.info("task " + rateEntry.getKey() + " rate = " + rateEntry.getValue() + " is < " + averageRate * slowestDiscount);
            AMWorker worker = workerManager.getWorker(rateEntry.getKey());
            if (worker != null) {
                LOG.info("put worker " + worker.getId() + " to slow worker list");
                slowWorkers.add(worker.getId());
            }
        }
    }
    List<Id> slowWorkerList = new ArrayList<>(slowWorkers.size());
    slowWorkerList.addAll(slowWorkers);
    return slowWorkerList;
}
Also used : TaskId(com.tencent.angel.worker.task.TaskId) WorkerManager(com.tencent.angel.master.worker.WorkerManager) AMTaskManager(com.tencent.angel.master.task.AMTaskManager) AMWorker(com.tencent.angel.master.worker.worker.AMWorker) Id(com.tencent.angel.common.Id) TaskId(com.tencent.angel.worker.task.TaskId) AMTask(com.tencent.angel.master.task.AMTask)

Aggregations

Id (com.tencent.angel.common.Id)5 PSAttemptId (com.tencent.angel.ps.PSAttemptId)4 PSAgentAttemptId (com.tencent.angel.psagent.PSAgentAttemptId)4 WorkerAttemptId (com.tencent.angel.worker.WorkerAttemptId)4 LocalPS (com.tencent.angel.localcluster.LocalPS)1 LocalWorker (com.tencent.angel.localcluster.LocalWorker)1 PSAttemptContainerAssignedEvent (com.tencent.angel.master.ps.attempt.PSAttemptContainerAssignedEvent)1 PSAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.ps.attempt.PSAttemptDiagnosticsUpdateEvent)1 PSAttemptEvent (com.tencent.angel.master.ps.attempt.PSAttemptEvent)1 PSAgentAttemptContainerAssignedEvent (com.tencent.angel.master.psagent.PSAgentAttemptContainerAssignedEvent)1 PSAgentAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.psagent.PSAgentAttemptDiagnosticsUpdateEvent)1 PSAgentAttemptEvent (com.tencent.angel.master.psagent.PSAgentAttemptEvent)1 AMTask (com.tencent.angel.master.task.AMTask)1 AMTaskManager (com.tencent.angel.master.task.AMTaskManager)1 WorkerManager (com.tencent.angel.master.worker.WorkerManager)1 WorkerAttemptContainerAssignedEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptContainerAssignedEvent)1 WorkerAttemptDiagnosticsUpdateEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptDiagnosticsUpdateEvent)1 WorkerAttemptEvent (com.tencent.angel.master.worker.attempt.WorkerAttemptEvent)1 AMWorker (com.tencent.angel.master.worker.worker.AMWorker)1 TaskId (com.tencent.angel.worker.task.TaskId)1