Search in sources :

Example 1 with UcpListenerParams

use of org.openucx.jucx.ucp.UcpListenerParams in project twister2 by DSC-SPIDAL.

the class TWSUCXChannel method createUcpListener.

/**
 * create a UcpListener on a random port between 15k and 65k
 * if a chosen port is taken, try other random ports
 * @param ucpWorker
 * @param wIP
 * @return
 */
private static UcpListener createUcpListener(UcpWorker ucpWorker, InetAddress wIP) {
    Random rg = new Random();
    UcpListenerParams ucpListenerParams = new UcpListenerParams();
    int tryCount = 0;
    int maxTryCount = 10;
    while (tryCount++ < maxTryCount) {
        // generate random port numbers in the range of 15k to 65k
        int port = rg.nextInt(40000) + 15000;
        ucpListenerParams.setSockAddr(new InetSocketAddress(wIP.getHostAddress(), port));
        try {
            UcpListener ucpListener = ucpWorker.newListener(ucpListenerParams);
            return ucpListener;
        } catch (UcxException ucxException) {
            if (tryCount == maxTryCount) {
                throw new Twister2RuntimeException(ucxException);
            }
        }
    }
    return null;
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Random(java.util.Random) InetSocketAddress(java.net.InetSocketAddress) UcpListener(org.openucx.jucx.ucp.UcpListener) UcpListenerParams(org.openucx.jucx.ucp.UcpListenerParams) UcpEndpoint(org.openucx.jucx.ucp.UcpEndpoint) UcxException(org.openucx.jucx.UcxException)

Example 2 with UcpListenerParams

use of org.openucx.jucx.ucp.UcpListenerParams in project twister2 by DSC-SPIDAL.

the class TWSUCXChannel method createUXCWorker.

private void createUXCWorker(IWorkerController iWorkerController) {
    UcpContext ucpContext = null;
    UcpListener ucpListener = null;
    // if UCX socket is already created, use that
    // this happens in mpi clusters
    Stack<Closeable> ucxObjects = (Stack<Closeable>) WorkerEnvironment.getSharedValue("ucxSocketsForFreePorts");
    if (ucxObjects != null && ucxObjects.size() > 2) {
        // todo: handle the case when there are multiple ucp sockets
        while (!ucxObjects.isEmpty()) {
            Closeable ucxObj = ucxObjects.pop();
            if (ucxObj instanceof UcpListener) {
                ucpListener = (UcpListener) ucxObj;
            } else if (ucxObj instanceof UcpContext) {
                ucpContext = (UcpContext) ucxObj;
            } else if (ucxObj instanceof UcpWorker) {
                ucpWorker = (UcpWorker) ucxObj;
            } else {
                LOG.warning("Unrecognized UCX object: " + ucxObj);
            }
        }
        // add them to closeables
        closeables.push(ucpContext);
        closeables.push(ucpWorker);
        closeables.push(ucpListener);
    // create UCX objects
    } else {
        ucpContext = initUcpContext();
        this.closeables.push(ucpContext);
        this.ucpWorker = ucpContext.newWorker(new UcpWorkerParams().requestThreadSafety());
        this.closeables.push(ucpWorker);
        UcpListenerParams ucpListenerParams = new UcpListenerParams().setSockAddr(new InetSocketAddress(iWorkerController.getWorkerInfo().getWorkerIP(), iWorkerController.getWorkerInfo().getPort()));
        // start listener
        try {
            ucpListener = ucpWorker.newListener(ucpListenerParams);
            closeables.push(ucpListener);
        } catch (org.openucx.jucx.UcxException ucxEx) {
            throw new Twister2RuntimeException("Can not start TWSUCXChannel.", ucxEx);
        }
    }
    try {
        // wait till everyone add listeners
        iWorkerController.waitOnBarrier();
    } catch (TimeoutException e) {
        LOG.log(Level.SEVERE, "Failed to wait on barrier", e);
    }
    // create end points
    for (JobMasterAPI.WorkerInfo worker : iWorkerController.getJoinedWorkers()) {
        if (worker.getWorkerID() != workerId) {
            UcpEndpoint ucpEndpoint = ucpWorker.newEndpoint(new UcpEndpointParams().setSocketAddress(new InetSocketAddress(worker.getWorkerIP(), worker.getPort())));
            this.endpoints.put(worker.getWorkerID(), ucpEndpoint);
            this.closeables.push(ucpEndpoint);
        }
    }
}
Also used : UcpContext(org.openucx.jucx.ucp.UcpContext) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) InetSocketAddress(java.net.InetSocketAddress) Closeable(java.io.Closeable) UcpListener(org.openucx.jucx.ucp.UcpListener) UcxException(org.openucx.jucx.UcxException) UcpWorker(org.openucx.jucx.ucp.UcpWorker) Stack(java.util.Stack) UcpWorkerParams(org.openucx.jucx.ucp.UcpWorkerParams) JobMasterAPI(edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI) UcpListenerParams(org.openucx.jucx.ucp.UcpListenerParams) TimeoutException(edu.iu.dsc.tws.api.exceptions.TimeoutException) UcpEndpoint(org.openucx.jucx.ucp.UcpEndpoint) UcpEndpointParams(org.openucx.jucx.ucp.UcpEndpointParams)

Aggregations

Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)2 InetSocketAddress (java.net.InetSocketAddress)2 UcxException (org.openucx.jucx.UcxException)2 UcpEndpoint (org.openucx.jucx.ucp.UcpEndpoint)2 UcpListener (org.openucx.jucx.ucp.UcpListener)2 UcpListenerParams (org.openucx.jucx.ucp.UcpListenerParams)2 TimeoutException (edu.iu.dsc.tws.api.exceptions.TimeoutException)1 JobMasterAPI (edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI)1 Closeable (java.io.Closeable)1 Random (java.util.Random)1 Stack (java.util.Stack)1 UcpContext (org.openucx.jucx.ucp.UcpContext)1 UcpEndpointParams (org.openucx.jucx.ucp.UcpEndpointParams)1 UcpWorker (org.openucx.jucx.ucp.UcpWorker)1 UcpWorkerParams (org.openucx.jucx.ucp.UcpWorkerParams)1