Search in sources :

Example 1 with MPIException

use of mpi.MPIException in project twister2 by DSC-SPIDAL.

the class MPIProcess method createResourcePlan.

private static ResourcePlan createResourcePlan(Config config) {
    try {
        int rank = MPI.COMM_WORLD.getRank();
        ResourcePlan resourcePlan = new ResourcePlan(MPIContext.clusterType(config), MPI.COMM_WORLD.getRank());
        String processName = MPI.getProcessorName();
        char[] processNameChars = new char[processName.length()];
        int length = processNameChars.length;
        processName.getChars(0, length, processNameChars, 0);
        IntBuffer countSend = MPI.newIntBuffer(1);
        int worldSize = MPI.COMM_WORLD.getSize();
        IntBuffer countReceive = MPI.newIntBuffer(worldSize);
        // now calculate the total number of characters
        countSend.put(length);
        MPI.COMM_WORLD.allGather(countSend, 1, MPI.INT, countReceive, 1, MPI.INT);
        int[] receiveSizes = new int[worldSize];
        int[] displacements = new int[worldSize];
        int sum = 0;
        for (int i = 0; i < worldSize; i++) {
            receiveSizes[i] = countReceive.get(i);
            displacements[i] = sum;
            sum += receiveSizes[i];
        }
        // first we need to send the expected number of characters
        // MPI.COMM_WORLD.allGather(countSend, 1, MPI.INT, countReceive, worldSize, MPI.INT);
        // now we need to send this to all the nodes
        CharBuffer sendBuffer = MPI.newCharBuffer(length);
        CharBuffer receiveBuffer = MPI.newCharBuffer(sum);
        sendBuffer.append(processName);
        // now lets receive the process names of each rank
        MPI.COMM_WORLD.allGatherv(sendBuffer, length, MPI.CHAR, receiveBuffer, receiveSizes, displacements, MPI.CHAR);
        Map<Integer, String> processNames = new HashMap<>();
        for (int i = 0; i < receiveSizes.length; i++) {
            char[] c = new char[receiveSizes[i]];
            receiveBuffer.get(c);
            processNames.put(i, new String(c));
            LOG.log(Level.FINE, String.format("Process %d name: %s", i, processNames.get(i)));
        }
        // now lets add the containers
        addContainers(config, resourcePlan, processNames);
        return resourcePlan;
    } catch (MPIException e) {
        throw new RuntimeException("Failed to communicate", e);
    }
}
Also used : MPIException(mpi.MPIException) HashMap(java.util.HashMap) ResourcePlan(edu.iu.dsc.tws.rsched.spi.resource.ResourcePlan) IntBuffer(java.nio.IntBuffer) CharBuffer(java.nio.CharBuffer)

Example 2 with MPIException

use of mpi.MPIException in project twister2 by DSC-SPIDAL.

the class MPIProcess method worker.

private static void worker(Config config, int rank) {
    // lets create the resource plan
    ResourcePlan resourcePlan = createResourcePlan(config);
    String containerClass = MPIContext.containerClass(config);
    IContainer container;
    try {
        Object object = ReflectionUtils.newInstance(containerClass);
        container = (IContainer) object;
        LOG.log(Level.FINE, "loaded container class: " + containerClass);
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
        LOG.log(Level.SEVERE, String.format("failed to load the container class %s", containerClass), e);
        throw new RuntimeException(e);
    }
    // lets do a barrier here so everyone is synchronized at the start
    try {
        MPI.COMM_WORLD.barrier();
        LOG.log(Level.FINE, String.format("Worker %d: the cluster is ready...", rank));
    } catch (MPIException e) {
        LOG.log(Level.SEVERE, "Failed to synchronize the workers at the start");
        throw new RuntimeException(e);
    }
    // now initialize the container
    container.init(config, rank, resourcePlan);
}
Also used : MPIException(mpi.MPIException) ResourcePlan(edu.iu.dsc.tws.rsched.spi.resource.ResourcePlan) IContainer(edu.iu.dsc.tws.rsched.spi.container.IContainer)

Example 3 with MPIException

use of mpi.MPIException in project twister2 by DSC-SPIDAL.

the class TWSMPIChannel method progress.

/**
 * Progress the communications that are pending
 */
public void progress() {
    // we should rate limit here
    while (pendingSends.size() > 0) {
        // post the message
        MPISendRequests sendRequests = pendingSends.poll();
        // post the send
        if (sendRequests != null) {
            postMessage(sendRequests);
            waitForCompletionSends.add(sendRequests);
        }
    }
    for (int i = 0; i < registeredReceives.size(); i++) {
        MPIReceiveRequests receiveRequests = registeredReceives.get(i);
        if (debug) {
            LOG.info(String.format("%d available receive %d %d %s", executor, receiveRequests.rank, receiveRequests.availableBuffers.size(), receiveRequests.availableBuffers.peek()));
        }
        // okay we have more buffers to be posted
        if (receiveRequests.availableBuffers.size() > 0) {
            postReceive(receiveRequests);
        }
    }
    Iterator<MPISendRequests> sendRequestsIterator = waitForCompletionSends.iterator();
    boolean canProgress = true;
    while (sendRequestsIterator.hasNext() && canProgress) {
        MPISendRequests sendRequests = sendRequestsIterator.next();
        Iterator<MPIRequest> requestIterator = sendRequests.pendingSends.iterator();
        while (requestIterator.hasNext()) {
            MPIRequest r = requestIterator.next();
            try {
                Status status = r.request.testStatus();
                // this request has finished
                if (status != null) {
                    completedSendCount++;
                    requestIterator.remove();
                } else {
                    canProgress = false;
                    break;
                }
            } catch (MPIException e) {
                throw new RuntimeException("Failed to complete the send to: " + sendRequests.rank, e);
            }
        }
        // ideally we should be able to call for each finish of the buffer
        if (sendRequests.pendingSends.size() == 0) {
            sendRequests.callback.onSendComplete(sendRequests.rank, sendRequests.edge, sendRequests.message);
            sendRequestsIterator.remove();
        }
    }
    if (false) {
        LOG.info(String.format("%d sending - sent %d comp send %d receive %d pend recv %d pending sends %d waiting %d", executor, sendCount, completedSendCount, receiveCount, pendingReceiveCount, pendingSends.size(), waitForCompletionSends.size()));
    }
    for (int i = 0; i < registeredReceives.size(); i++) {
        MPIReceiveRequests receiveRequests = registeredReceives.get(i);
        try {
            Iterator<MPIRequest> requestIterator = receiveRequests.pendingRequests.iterator();
            while (requestIterator.hasNext()) {
                MPIRequest r = requestIterator.next();
                Status status = r.request.testStatus();
                if (status != null) {
                    if (!status.isCancelled()) {
                        // LOG.info(String.format("%d Receive completed: from %d size %d %d",
                        // executor, receiveRequests.rank, status.getCount(MPI.BYTE), ++receiveCount));
                        ++receiveCount;
                        // lets call the callback about the receive complete
                        r.buffer.setSize(status.getCount(MPI.BYTE));
                        receiveRequests.callback.onReceiveComplete(receiveRequests.rank, receiveRequests.edge, r.buffer);
                        // LOG.info(String.format("%d finished calling the on complete method", executor));
                        requestIterator.remove();
                    } else {
                        throw new RuntimeException("MPI receive request cancelled");
                    }
                } else {
                    break;
                }
            }
        // this request has completed
        } catch (MPIException e) {
            LOG.log(Level.SEVERE, "Twister2Network failure", e);
            throw new RuntimeException("Twister2Network failure", e);
        }
    }
}
Also used : Status(mpi.Status) MPIException(mpi.MPIException)

Example 4 with MPIException

use of mpi.MPIException in project twister2 by DSC-SPIDAL.

the class TWSMPIChannel method postMessage.

/**
 * Send a message to the given rank.
 *
 * @param requests the message
 */
private void postMessage(MPISendRequests requests) {
    MPIMessage message = requests.message;
    for (int i = 0; i < message.getBuffers().size(); i++) {
        try {
            sendCount++;
            MPIBuffer buffer = message.getBuffers().get(i);
            Request request = comm.iSend(buffer.getByteBuffer(), buffer.getSize(), MPI.BYTE, requests.rank, message.getHeader().getEdge());
            // register to the loop to make progress on the send
            requests.pendingSends.add(new MPIRequest(request, buffer));
        } catch (MPIException e) {
            throw new RuntimeException("Failed to send message to rank: " + requests.rank);
        }
    }
}
Also used : MPIException(mpi.MPIException) Request(mpi.Request)

Example 5 with MPIException

use of mpi.MPIException in project twister2 by DSC-SPIDAL.

the class MPIProcess method main.

public static void main(String[] args) {
    Options cmdOptions = null;
    try {
        MPI.Init(args);
        int rank = MPI.COMM_WORLD.getRank();
        int size = MPI.COMM_WORLD.getSize();
        cmdOptions = setupOptions();
        CommandLineParser parser = new DefaultParser();
        // parse the help options first.
        CommandLine cmd = parser.parse(cmdOptions, args);
        // load the configuration
        // we are loading the configuration for all the components
        Config config = loadConfigurations(cmd, rank);
        // normal worker
        LOG.log(Level.FINE, "A worker process is starting...");
        worker(config, rank);
    } catch (MPIException e) {
        LOG.log(Level.SEVERE, "Failed the MPI process", e);
        throw new RuntimeException(e);
    } catch (ParseException e) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("SubmitterMain", cmdOptions);
        throw new RuntimeException("Error parsing command line options: ", e);
    } finally {
        try {
            MPI.Finalize();
        } catch (MPIException ignore) {
        }
    }
}
Also used : MPIException(mpi.MPIException) HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Config(edu.iu.dsc.tws.common.config.Config) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) DefaultParser(org.apache.commons.cli.DefaultParser)

Aggregations

MPIException (mpi.MPIException)5 ResourcePlan (edu.iu.dsc.tws.rsched.spi.resource.ResourcePlan)2 Config (edu.iu.dsc.tws.common.config.Config)1 IContainer (edu.iu.dsc.tws.rsched.spi.container.IContainer)1 CharBuffer (java.nio.CharBuffer)1 IntBuffer (java.nio.IntBuffer)1 HashMap (java.util.HashMap)1 Request (mpi.Request)1 Status (mpi.Status)1 CommandLine (org.apache.commons.cli.CommandLine)1 CommandLineParser (org.apache.commons.cli.CommandLineParser)1 DefaultParser (org.apache.commons.cli.DefaultParser)1 HelpFormatter (org.apache.commons.cli.HelpFormatter)1 Options (org.apache.commons.cli.Options)1 ParseException (org.apache.commons.cli.ParseException)1