Search in sources :

Example 1 with UcpWorker

use of org.openucx.jucx.ucp.UcpWorker in project twister2 by DSC-SPIDAL.

the class TWSUCXChannel method createUXCWorker.

private void createUXCWorker(IWorkerController iWorkerController) {
    UcpContext ucpContext = null;
    UcpListener ucpListener = null;
    // if UCX socket is already created, use that
    // this happens in mpi clusters
    Stack<Closeable> ucxObjects = (Stack<Closeable>) WorkerEnvironment.getSharedValue("ucxSocketsForFreePorts");
    if (ucxObjects != null && ucxObjects.size() > 2) {
        // todo: handle the case when there are multiple ucp sockets
        while (!ucxObjects.isEmpty()) {
            Closeable ucxObj = ucxObjects.pop();
            if (ucxObj instanceof UcpListener) {
                ucpListener = (UcpListener) ucxObj;
            } else if (ucxObj instanceof UcpContext) {
                ucpContext = (UcpContext) ucxObj;
            } else if (ucxObj instanceof UcpWorker) {
                ucpWorker = (UcpWorker) ucxObj;
            } else {
                LOG.warning("Unrecognized UCX object: " + ucxObj);
            }
        }
        // add them to closeables
        closeables.push(ucpContext);
        closeables.push(ucpWorker);
        closeables.push(ucpListener);
    // create UCX objects
    } else {
        ucpContext = initUcpContext();
        this.closeables.push(ucpContext);
        this.ucpWorker = ucpContext.newWorker(new UcpWorkerParams().requestThreadSafety());
        this.closeables.push(ucpWorker);
        UcpListenerParams ucpListenerParams = new UcpListenerParams().setSockAddr(new InetSocketAddress(iWorkerController.getWorkerInfo().getWorkerIP(), iWorkerController.getWorkerInfo().getPort()));
        // start listener
        try {
            ucpListener = ucpWorker.newListener(ucpListenerParams);
            closeables.push(ucpListener);
        } catch (org.openucx.jucx.UcxException ucxEx) {
            throw new Twister2RuntimeException("Can not start TWSUCXChannel.", ucxEx);
        }
    }
    try {
        // wait till everyone add listeners
        iWorkerController.waitOnBarrier();
    } catch (TimeoutException e) {
        LOG.log(Level.SEVERE, "Failed to wait on barrier", e);
    }
    // create end points
    for (JobMasterAPI.WorkerInfo worker : iWorkerController.getJoinedWorkers()) {
        if (worker.getWorkerID() != workerId) {
            UcpEndpoint ucpEndpoint = ucpWorker.newEndpoint(new UcpEndpointParams().setSocketAddress(new InetSocketAddress(worker.getWorkerIP(), worker.getPort())));
            this.endpoints.put(worker.getWorkerID(), ucpEndpoint);
            this.closeables.push(ucpEndpoint);
        }
    }
}
Also used : UcpContext(org.openucx.jucx.ucp.UcpContext) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) InetSocketAddress(java.net.InetSocketAddress) Closeable(java.io.Closeable) UcpListener(org.openucx.jucx.ucp.UcpListener) UcxException(org.openucx.jucx.UcxException) UcpWorker(org.openucx.jucx.ucp.UcpWorker) Stack(java.util.Stack) UcpWorkerParams(org.openucx.jucx.ucp.UcpWorkerParams) JobMasterAPI(edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI) UcpListenerParams(org.openucx.jucx.ucp.UcpListenerParams) TimeoutException(edu.iu.dsc.tws.api.exceptions.TimeoutException) UcpEndpoint(org.openucx.jucx.ucp.UcpEndpoint) UcpEndpointParams(org.openucx.jucx.ucp.UcpEndpointParams)

Example 2 with UcpWorker

use of org.openucx.jucx.ucp.UcpWorker in project twister2 by DSC-SPIDAL.

the class TWSUCXChannel method findFreeUcxPorts.

/**
 * create Ucx sockets and return the ports
 * save the created objects in the the static map of WorkerEnvironment,
 * so that they can be reused when TWSUCXChannel is initialized
 * @param portNames
 * @param wIP
 * @return
 */
public static Map<String, Integer> findFreeUcxPorts(List<String> portNames, InetAddress wIP) {
    UcpContext context = initUcpContext();
    Stack<Closeable> ucxObjects = new Stack<>();
    ucxObjects.push(context);
    UcpWorker ucpWorker = context.newWorker(new UcpWorkerParams().requestThreadSafety());
    ucxObjects.push(ucpWorker);
    Map<String, Integer> freePorts = new HashMap<>();
    for (String portName : portNames) {
        UcpListener ucpListener = createUcpListener(ucpWorker, wIP);
        ucxObjects.push(ucpListener);
        int port = ucpListener.getAddress().getPort();
        LOG.fine("workerPort for ucx channel: " + port);
        freePorts.put(portName, ucpListener.getAddress().getPort());
    }
    WorkerEnvironment.putSharedValue("ucxSocketsForFreePorts", ucxObjects);
    return freePorts;
}
Also used : UcpWorkerParams(org.openucx.jucx.ucp.UcpWorkerParams) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UcpContext(org.openucx.jucx.ucp.UcpContext) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Closeable(java.io.Closeable) UcpListener(org.openucx.jucx.ucp.UcpListener) UcpWorker(org.openucx.jucx.ucp.UcpWorker) UcpEndpoint(org.openucx.jucx.ucp.UcpEndpoint) Stack(java.util.Stack)

Aggregations

Closeable (java.io.Closeable)2 Stack (java.util.Stack)2 UcpContext (org.openucx.jucx.ucp.UcpContext)2 UcpEndpoint (org.openucx.jucx.ucp.UcpEndpoint)2 UcpListener (org.openucx.jucx.ucp.UcpListener)2 UcpWorker (org.openucx.jucx.ucp.UcpWorker)2 UcpWorkerParams (org.openucx.jucx.ucp.UcpWorkerParams)2 TimeoutException (edu.iu.dsc.tws.api.exceptions.TimeoutException)1 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)1 JobMasterAPI (edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI)1 InetSocketAddress (java.net.InetSocketAddress)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 UcxException (org.openucx.jucx.UcxException)1 UcpEndpointParams (org.openucx.jucx.ucp.UcpEndpointParams)1 UcpListenerParams (org.openucx.jucx.ucp.UcpListenerParams)1