Search in sources :

Example 36 with Twister2RuntimeException

use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.

the class OperationUtils method progressReceivers.

/**
 * Progress the receivers and return true if needs further progress
 *
 * @param delegate the channel dataflow opeation
 * @param lock lock for final receiver
 * @param finalReceiver final receiver
 * @param partialLock lock for partial receiver
 * @param partialReceiver partial receiver
 * @return true if need further progress
 */
public static boolean progressReceivers(ChannelDataFlowOperation delegate, Lock lock, MessageReceiver finalReceiver, Lock partialLock, MessageReceiver partialReceiver) {
    boolean finalNeedsProgress = false;
    boolean partialNeedsProgress = false;
    try {
        delegate.progress();
        if (lock.tryLock()) {
            try {
                finalNeedsProgress = finalReceiver.progress();
            } finally {
                lock.unlock();
            }
        }
        if (partialLock.tryLock()) {
            try {
                partialNeedsProgress = partialReceiver.progress();
            } finally {
                partialLock.unlock();
            }
        }
    } catch (Throwable t) {
        LOG.log(Level.SEVERE, "un-expected error", t);
        throw new Twister2RuntimeException(t);
    }
    return finalNeedsProgress || partialNeedsProgress;
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)

Example 37 with Twister2RuntimeException

use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.

the class OperationUtils method areReceiversComplete.

/**
 * Progress the receivers and return true if needs further progress
 *
 * @param finalLock lock for final receiver
 * @param finalReceiver final receiver
 * @param partialLock lock for partial receiver
 * @param partialReceiver partial receiver
 * @return true if need further progress
 */
public static boolean areReceiversComplete(Lock finalLock, MessageReceiver finalReceiver, Lock partialLock, MessageReceiver partialReceiver) {
    boolean finalComplete = false;
    boolean mergeComplete = false;
    try {
        if (finalLock.tryLock()) {
            try {
                finalComplete = finalReceiver.isComplete();
            } finally {
                finalLock.unlock();
            }
        }
        if (partialLock.tryLock()) {
            try {
                mergeComplete = partialReceiver.isComplete();
            } finally {
                partialLock.unlock();
            }
        }
    } catch (Throwable t) {
        LOG.log(Level.SEVERE, "un-expected error", t);
        throw new Twister2RuntimeException(t);
    }
    return finalComplete && mergeComplete;
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)

Example 38 with Twister2RuntimeException

use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.

the class TWSUCXChannel method createUcpListener.

/**
 * create a UcpListener on a random port between 15k and 65k
 * if a chosen port is taken, try other random ports
 * @param ucpWorker
 * @param wIP
 * @return
 */
private static UcpListener createUcpListener(UcpWorker ucpWorker, InetAddress wIP) {
    Random rg = new Random();
    UcpListenerParams ucpListenerParams = new UcpListenerParams();
    int tryCount = 0;
    int maxTryCount = 10;
    while (tryCount++ < maxTryCount) {
        // generate random port numbers in the range of 15k to 65k
        int port = rg.nextInt(40000) + 15000;
        ucpListenerParams.setSockAddr(new InetSocketAddress(wIP.getHostAddress(), port));
        try {
            UcpListener ucpListener = ucpWorker.newListener(ucpListenerParams);
            return ucpListener;
        } catch (UcxException ucxException) {
            if (tryCount == maxTryCount) {
                throw new Twister2RuntimeException(ucxException);
            }
        }
    }
    return null;
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Random(java.util.Random) InetSocketAddress(java.net.InetSocketAddress) UcpListener(org.openucx.jucx.ucp.UcpListener) UcpListenerParams(org.openucx.jucx.ucp.UcpListenerParams) UcpEndpoint(org.openucx.jucx.ucp.UcpEndpoint) UcxException(org.openucx.jucx.UcxException)

Example 39 with Twister2RuntimeException

use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.

the class MPIWorkerStarter method startWorkerWithJM.

private void startWorkerWithJM() {
    if (JobMasterContext.jobMasterRunsInClient(config)) {
        wInfo = createWorkerInfo(config, globalRank);
        WorkerRuntime.init(config, job, wInfo, restartCount);
        startWorker(MPI.COMM_WORLD);
    } else {
        // lets broadcast the worker info
        // broadcast the port of job master
        // when JM is not running in the submitting client,
        // it is running at rank 0 of MPI world
        // Split JM MPI world and worker MPI worlds
        int color = globalRank == 0 ? 0 : 1;
        int splittedRank;
        Intracomm splittedComm;
        try {
            splittedComm = MPI.COMM_WORLD.split(color, globalRank);
            splittedRank = splittedComm.getRank();
        } catch (MPIException e) {
            throw new Twister2RuntimeException("Can not split MPI.COMM_WORLD", e);
        }
        if (globalRank == 0) {
            wInfo = createWorkerInfo(config, -1);
        } else {
            wInfo = createWorkerInfo(config, splittedRank);
        }
        // broadcast the job master information to all workers
        broadCastMasterInformation(globalRank);
        if (globalRank == 0) {
            startMaster();
        } else {
            // init WorkerRuntime
            WorkerRuntime.init(config, job, wInfo, restartCount);
            startWorker(splittedComm);
        }
    }
}
Also used : MPIException(mpi.MPIException) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Intracomm(mpi.Intracomm)

Example 40 with Twister2RuntimeException

use of edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException in project twister2 by DSC-SPIDAL.

the class NetworkUtils method releaseWorkerPorts.

/**
 * Release the ServerSockets on the local ports
 * that are started with findFreePorts method
 * This method must be called after the JMWorkerAgent is started and
 * before IWorker is started in MPIWorkerManager/WorkerManager
 */
public static void releaseWorkerPorts() {
    List<ServerSocket> sockets = (List<ServerSocket>) WorkerEnvironment.removeSharedValue("socketsForFreePorts");
    if (sockets == null) {
        return;
    }
    boolean allSocketsClosed = true;
    int port = 0;
    for (ServerSocket socket : sockets) {
        try {
            port = socket.getLocalPort();
            socket.close();
            LOG.fine("Temporary socket closed at the port: " + port);
        } catch (IOException ioException) {
            allSocketsClosed = false;
            LOG.log(Level.SEVERE, "Exception when closing the temporary socket at the port: " + port, ioException);
        }
    }
    if (!allSocketsClosed) {
        throw new Twister2RuntimeException("Could not release one or more free TCP/IP ports");
    }
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ServerSocket(java.net.ServerSocket) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException)

Aggregations

Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)65 Twister2Exception (edu.iu.dsc.tws.api.exceptions.Twister2Exception)17 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)10 JobMasterAPI (edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI)8 Path (edu.iu.dsc.tws.api.data.Path)7 Config (edu.iu.dsc.tws.api.config.Config)6 File (java.io.File)5 TimeoutException (edu.iu.dsc.tws.api.exceptions.TimeoutException)4 FileInputStream (java.io.FileInputStream)4 InvocationTargetException (java.lang.reflect.InvocationTargetException)4 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)3 Twister2JobState (edu.iu.dsc.tws.api.scheduler.Twister2JobState)3 ArrowTableBuilder (edu.iu.dsc.tws.common.table.ArrowTableBuilder)3 TableRuntime (edu.iu.dsc.tws.common.table.arrow.TableRuntime)3 List (java.util.List)3 Map (java.util.Map)3 Logger (java.util.logging.Logger)3 MessageType (edu.iu.dsc.tws.api.comms.messaging.types.MessageType)2 TaskSchedulerException (edu.iu.dsc.tws.api.compute.exceptions.TaskSchedulerException)2