use of edu.iu.dsc.tws.rsched.bootstrap.WorkerNetworkInfo in project twister2 by DSC-SPIDAL.
the class TCPNetwork method initialize.
public void initialize() {
NetworkInfo networkInfo = new NetworkInfo(-1);
// we need to first start the server part of tcp
// lets first start the channel
channel = new TCPChannel(config, networkInfo);
channel.startFirstPhase();
// first lets intialize the zk
int numberOfWorkers = job.getJobResources().getNoOfContainers();
zkController = new ZKController(config, job.getJobName(), workerUniqueId, numberOfWorkers);
zkController.initialize();
// the amount of time to wait for all workers to join a job
int timeLimit = ZKContext.maxWaitTimeForAllWorkersToJoin(config);
List<WorkerNetworkInfo> workerNetworkInfoList = zkController.waitForAllWorkersToJoin(timeLimit);
if (workerNetworkInfoList == null) {
throw new RuntimeException("Error getting the worker list from ZooKeeper");
}
List<NetworkInfo> networkInfos = new ArrayList<>();
NetworkInfo thisNet = null;
for (WorkerNetworkInfo info : workerNetworkInfoList) {
NetworkInfo netInfo = new NetworkInfo(info.getWorkerID());
netInfo.addProperty(TCPContext.NETWORK_HOSTNAME, info.getWorkerIP());
netInfo.addProperty(TCPContext.NETWORK_PORT, info.getWorkerPort());
// todo: we need to have tcp information from worker info
if (workerUniqueId.equals(info.getWorkerName())) {
thisNet = netInfo;
}
networkInfos.add(netInfo);
}
// now intialize with zookeeper
channel.startSecondPhase(networkInfos, thisNet);
TWSTCPChannel twstcpChannel = new TWSTCPChannel(config, taskPlan.getThisExecutor(), channel);
// now lets create the dataflow communication
dataFlowCommunication = new MPIDataFlowCommunication();
dataFlowCommunication.init(config, taskPlan, twstcpChannel);
}
use of edu.iu.dsc.tws.rsched.bootstrap.WorkerNetworkInfo in project twister2 by DSC-SPIDAL.
the class WorkerController method buildWorkerList.
/**
* build worker list by getting the pod list from the kubernetes master
*/
private void buildWorkerList() {
String namespace = KubernetesContext.namespace(config);
String servicelabel = KubernetesUtils.createServiceLabelWithApp(jobName);
int basePort = KubernetesContext.workerBasePort(config);
V1PodList list = null;
try {
list = coreApi.listNamespacedPod(namespace, null, null, null, null, servicelabel, null, null, null, null);
} catch (ApiException e) {
String logMessage = "Exception when getting the pod list for the job: " + jobName + "\n" + "exCode: " + e.getCode() + "\n" + "responseBody: " + e.getResponseBody();
LOG.log(Level.SEVERE, logMessage, e);
throw new RuntimeException(e);
}
workerList.clear();
for (V1Pod pod : list.getItems()) {
String podName = pod.getMetadata().getName();
if (!podName.startsWith(jobName)) {
LOG.warning("A pod received that does not belong to this job. PodName: " + podName);
continue;
}
InetAddress podIP = convertStringToIP(pod.getStatus().getPodIP());
for (int i = 0; i < workersPerPod; i++) {
int containerIndex = i;
int workerID = calculateWorkerID(podName, containerIndex);
WorkerNetworkInfo workerNetworkInfo = new WorkerNetworkInfo(podIP, basePort + containerIndex, workerID);
workerList.add(workerNetworkInfo);
}
}
}
use of edu.iu.dsc.tws.rsched.bootstrap.WorkerNetworkInfo in project twister2 by DSC-SPIDAL.
the class WorkerController method printWorkers.
public static void printWorkers(ArrayList<WorkerNetworkInfo> workers) {
StringBuffer buffer = new StringBuffer();
buffer.append("Number of workers: " + workers.size() + "\n");
int i = 0;
for (WorkerNetworkInfo worker : workers) {
buffer.append(String.format("%d: workerID[%d] %s\n", i++, worker.getWorkerID(), worker.getWorkerName()));
}
LOG.info(buffer.toString());
}
Aggregations