use of com.tencent.angel.common.Id in project angel by Tencent.
the class LocalContainerAllocator method requestContainer.
@SuppressWarnings("unchecked")
private void requestContainer(ContainerAllocatorEvent event) {
LocalContainer allocated = new LocalContainer();
Id id = event.getTaskId();
if (id instanceof PSAttemptId) {
context.getEventHandler().handle(new PSAttemptContainerAssignedEvent((PSAttemptId) id, allocated));
} else if (id instanceof PSAgentAttemptId) {
context.getEventHandler().handle(new PSAgentAttemptContainerAssignedEvent((PSAgentAttemptId) id, allocated));
} else if (id instanceof WorkerAttemptId) {
context.getEventHandler().handle(new WorkerAttemptContainerAssignedEvent((WorkerAttemptId) id, allocated));
}
}
use of com.tencent.angel.common.Id in project angel by Tencent.
the class LocalContainerLauncher method launch.
@SuppressWarnings("unchecked")
private void launch(ContainerLauncherEvent event) {
Id id = event.getId();
if (id instanceof PSAttemptId) {
LocalPS ps = new LocalPS((PSAttemptId) id, context.getMasterService().getLocation(), context.getConf());
context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCHED, (PSAttemptId) id));
try {
ps.start();
LocalClusterContext.get().addPS((PSAttemptId) id, ps);
} catch (Exception e) {
LOG.error("launch ps failed.", e);
context.getEventHandler().handle(new PSAttemptEvent(PSAttemptEventType.PA_CONTAINER_LAUNCH_FAILED, (PSAttemptId) id));
}
} else if (id instanceof PSAgentAttemptId) {
context.getEventHandler().handle(new PSAgentAttemptEvent(PSAgentAttemptEventType.PSAGENT_ATTEMPT_CONTAINER_LAUNCHED, (PSAgentAttemptId) id));
} else if (id instanceof WorkerAttemptId) {
LocalWorker worker = new LocalWorker(context.getConf(), context.getApplicationId(), context.getUser(), (WorkerAttemptId) id, context.getMasterService().getLocation(), 0, false);
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCHED, (WorkerAttemptId) id));
try {
worker.start();
LocalClusterContext.get().addWorker((WorkerAttemptId) id, worker);
} catch (Exception e) {
LOG.error("launch worker failed.", e);
context.getEventHandler().handle(new WorkerAttemptEvent(WorkerAttemptEventType.CONTAINER_LAUNCH_FAILED, (WorkerAttemptId) id));
}
}
}
use of com.tencent.angel.common.Id in project angel by Tencent.
the class YarnContainerAllocator method assignContainers.
private void assignContainers(List<Container> allocatedContainers) {
// try to assign to all nodes first to match node local
Iterator<Container> it = allocatedContainers.iterator();
while (it.hasNext()) {
Container allocated = it.next();
Map<String, LinkedList<Id>> hostToIDListMap = hostToIDListMaps.get(allocated.getPriority());
Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
if (hostToIDListMap == null || hostToIDListMap.isEmpty() || idToRequestMap == null || idToRequestMap.isEmpty()) {
continue;
}
String host = allocated.getNodeId().getHost();
LinkedList<Id> list = hostToIDListMap.get(host);
while (list != null && list.size() > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Host matched to the request list " + host);
}
Id tId = list.removeFirst();
if (idToRequestMap.containsKey(tId)) {
ContainerRequest assigned = idToRequestMap.remove(tId);
containerAssigned(allocated, assigned);
it.remove();
hostLocalAssigned++;
LOG.debug("Assigned based on host match " + host);
break;
}
}
}
// try to match all rack local
it = allocatedContainers.iterator();
while (it.hasNext()) {
Container allocated = it.next();
Map<String, LinkedList<Id>> rackToIDListMap = rackToIDListMaps.get(allocated.getPriority());
Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
if (rackToIDListMap == null || rackToIDListMap.isEmpty() || idToRequestMap == null || idToRequestMap.isEmpty()) {
continue;
}
String host = allocated.getNodeId().getHost();
String rack = RackResolver.resolve(host).getNetworkLocation();
LinkedList<Id> list = rackToIDListMap.get(rack);
while (list != null && list.size() > 0) {
Id tId = list.removeFirst();
if (idToRequestMap.containsKey(tId)) {
ContainerRequest assigned = idToRequestMap.remove(tId);
containerAssigned(allocated, assigned);
it.remove();
rackLocalAssigned++;
if (LOG.isDebugEnabled()) {
LOG.debug("Assigned based on rack match " + rack);
}
break;
}
}
}
// assign remaining
it = allocatedContainers.iterator();
while (it.hasNext()) {
Container allocated = it.next();
Map<Id, ContainerRequest> idToRequestMap = idToRequestMaps.get(allocated.getPriority());
if ((context.getPSAgentManager() != null && allocated.getPriority() == context.getPSAgentManager().getPsAgentPriority()) || idToRequestMap == null || idToRequestMap.isEmpty()) {
continue;
}
Id tId = idToRequestMap.keySet().iterator().next();
ContainerRequest assigned = idToRequestMap.remove(tId);
containerAssigned(allocated, assigned);
it.remove();
if (LOG.isDebugEnabled()) {
LOG.debug("Assigned based on * match");
}
}
}
use of com.tencent.angel.common.Id in project angel by Tencent.
the class YarnContainerAllocator method handleFinishContainers.
@SuppressWarnings("unchecked")
private void handleFinishContainers(List<ContainerStatus> finishedContainers) {
for (ContainerStatus cont : finishedContainers) {
LOG.info("Received completed container:" + cont);
Id id = assignedContainerToIDMap.get(cont.getContainerId());
if (id == null) {
LOG.error("Container complete event for unknown container id " + cont.getContainerId());
} else {
assignedContainerToIDMap.remove(cont.getContainerId());
idToContainerMap.remove(id);
// dispatch container exit message to corresponding components
String diagnostics = StringInterner.weakIntern(cont.getDiagnostics());
if (id instanceof PSAttemptId) {
context.getEventHandler().handle(new PSAttemptDiagnosticsUpdateEvent(diagnostics, (PSAttemptId) id));
context.getEventHandler().handle(createContainerFinishedEvent(cont, (PSAttemptId) id));
} else if (id instanceof PSAgentAttemptId) {
context.getEventHandler().handle(new PSAgentAttemptDiagnosticsUpdateEvent((PSAgentAttemptId) id, diagnostics));
context.getEventHandler().handle(createContainerFinishedEvent(cont, (PSAgentAttemptId) id));
} else if (id instanceof WorkerAttemptId) {
context.getEventHandler().handle(new WorkerAttemptDiagnosticsUpdateEvent((WorkerAttemptId) id, diagnostics));
context.getEventHandler().handle(createContainerFinishedEvent(cont, (WorkerAttemptId) id));
}
}
}
}
use of com.tencent.angel.common.Id in project angel by Tencent.
the class TaskCalPerfChecker method check.
@Override
public List<Id> check(AMContext context) {
double slowestDiscount = context.getConf().getDouble(AngelConf.ANGEL_AM_TASK_SLOWEST_DISCOUNT, AngelConf.DEFAULT_ANGEL_AM_TASK_SLOWEST_DISCOUNT);
LOG.info("start to check slow workers use TaskCalPerfChecker policy, slowestDiscount = " + slowestDiscount);
Set<Id> slowWorkers = new HashSet<Id>();
AMTaskManager taskManage = context.getTaskManager();
WorkerManager workerManager = context.getWorkerManager();
Collection<AMTask> tasks = taskManage.getTasks();
long totalSamples = 0;
long totalCalTimeMs = 0;
double averageRate = 0.0;
Map<TaskId, Double> taskIdToRateMap = new HashMap<TaskId, Double>(tasks.size());
for (AMTask task : tasks) {
if (task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_SAMPLES) && task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_TIME_MS)) {
long sampleNum = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_SAMPLES));
double calTimeMs = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_TIME_MS));
LOG.info("for task " + task.getTaskId() + ", sampleNum = " + sampleNum + ", calTimeMs = " + calTimeMs);
totalSamples += sampleNum;
totalCalTimeMs += calTimeMs;
if (sampleNum > 5000000) {
LOG.info("task " + task.getTaskId() + " calculate rate = " + (calTimeMs * 10000 / sampleNum));
taskIdToRateMap.put(task.getTaskId(), calTimeMs * 10000 / sampleNum);
}
}
}
if (totalSamples != 0) {
averageRate = (double) totalCalTimeMs * 10000 / totalSamples;
}
LOG.info("totalSamples = " + totalSamples + ", totalCalTimeMs = " + totalCalTimeMs + ", average calulate time for 10000 samples = " + averageRate + ", the maximum calulate time for 10000 sample = " + averageRate / slowestDiscount);
for (Map.Entry<TaskId, Double> rateEntry : taskIdToRateMap.entrySet()) {
if (averageRate < rateEntry.getValue() * slowestDiscount) {
LOG.info("task " + rateEntry.getKey() + " rate = " + rateEntry.getValue() + " is < " + averageRate * slowestDiscount);
AMWorker worker = workerManager.getWorker(rateEntry.getKey());
if (worker != null) {
LOG.info("put worker " + worker.getId() + " to slow worker list");
slowWorkers.add(worker.getId());
}
}
}
List<Id> slowWorkerList = new ArrayList<>(slowWorkers.size());
slowWorkerList.addAll(slowWorkers);
return slowWorkerList;
}
Aggregations