use of mpi.MPIException in project twister2 by DSC-SPIDAL.
the class MPIProcess method createResourcePlan.
private static ResourcePlan createResourcePlan(Config config) {
try {
int rank = MPI.COMM_WORLD.getRank();
ResourcePlan resourcePlan = new ResourcePlan(MPIContext.clusterType(config), MPI.COMM_WORLD.getRank());
String processName = MPI.getProcessorName();
char[] processNameChars = new char[processName.length()];
int length = processNameChars.length;
processName.getChars(0, length, processNameChars, 0);
IntBuffer countSend = MPI.newIntBuffer(1);
int worldSize = MPI.COMM_WORLD.getSize();
IntBuffer countReceive = MPI.newIntBuffer(worldSize);
// now calculate the total number of characters
countSend.put(length);
MPI.COMM_WORLD.allGather(countSend, 1, MPI.INT, countReceive, 1, MPI.INT);
int[] receiveSizes = new int[worldSize];
int[] displacements = new int[worldSize];
int sum = 0;
for (int i = 0; i < worldSize; i++) {
receiveSizes[i] = countReceive.get(i);
displacements[i] = sum;
sum += receiveSizes[i];
}
// first we need to send the expected number of characters
// MPI.COMM_WORLD.allGather(countSend, 1, MPI.INT, countReceive, worldSize, MPI.INT);
// now we need to send this to all the nodes
CharBuffer sendBuffer = MPI.newCharBuffer(length);
CharBuffer receiveBuffer = MPI.newCharBuffer(sum);
sendBuffer.append(processName);
// now lets receive the process names of each rank
MPI.COMM_WORLD.allGatherv(sendBuffer, length, MPI.CHAR, receiveBuffer, receiveSizes, displacements, MPI.CHAR);
Map<Integer, String> processNames = new HashMap<>();
for (int i = 0; i < receiveSizes.length; i++) {
char[] c = new char[receiveSizes[i]];
receiveBuffer.get(c);
processNames.put(i, new String(c));
LOG.log(Level.FINE, String.format("Process %d name: %s", i, processNames.get(i)));
}
// now lets add the containers
addContainers(config, resourcePlan, processNames);
return resourcePlan;
} catch (MPIException e) {
throw new RuntimeException("Failed to communicate", e);
}
}
use of mpi.MPIException in project twister2 by DSC-SPIDAL.
the class MPIProcess method worker.
private static void worker(Config config, int rank) {
// lets create the resource plan
ResourcePlan resourcePlan = createResourcePlan(config);
String containerClass = MPIContext.containerClass(config);
IContainer container;
try {
Object object = ReflectionUtils.newInstance(containerClass);
container = (IContainer) object;
LOG.log(Level.FINE, "loaded container class: " + containerClass);
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
LOG.log(Level.SEVERE, String.format("failed to load the container class %s", containerClass), e);
throw new RuntimeException(e);
}
// lets do a barrier here so everyone is synchronized at the start
try {
MPI.COMM_WORLD.barrier();
LOG.log(Level.FINE, String.format("Worker %d: the cluster is ready...", rank));
} catch (MPIException e) {
LOG.log(Level.SEVERE, "Failed to synchronize the workers at the start");
throw new RuntimeException(e);
}
// now initialize the container
container.init(config, rank, resourcePlan);
}
use of mpi.MPIException in project twister2 by DSC-SPIDAL.
the class TWSMPIChannel method progress.
/**
* Progress the communications that are pending
*/
public void progress() {
// we should rate limit here
while (pendingSends.size() > 0) {
// post the message
MPISendRequests sendRequests = pendingSends.poll();
// post the send
if (sendRequests != null) {
postMessage(sendRequests);
waitForCompletionSends.add(sendRequests);
}
}
for (int i = 0; i < registeredReceives.size(); i++) {
MPIReceiveRequests receiveRequests = registeredReceives.get(i);
if (debug) {
LOG.info(String.format("%d available receive %d %d %s", executor, receiveRequests.rank, receiveRequests.availableBuffers.size(), receiveRequests.availableBuffers.peek()));
}
// okay we have more buffers to be posted
if (receiveRequests.availableBuffers.size() > 0) {
postReceive(receiveRequests);
}
}
Iterator<MPISendRequests> sendRequestsIterator = waitForCompletionSends.iterator();
boolean canProgress = true;
while (sendRequestsIterator.hasNext() && canProgress) {
MPISendRequests sendRequests = sendRequestsIterator.next();
Iterator<MPIRequest> requestIterator = sendRequests.pendingSends.iterator();
while (requestIterator.hasNext()) {
MPIRequest r = requestIterator.next();
try {
Status status = r.request.testStatus();
// this request has finished
if (status != null) {
completedSendCount++;
requestIterator.remove();
} else {
canProgress = false;
break;
}
} catch (MPIException e) {
throw new RuntimeException("Failed to complete the send to: " + sendRequests.rank, e);
}
}
// ideally we should be able to call for each finish of the buffer
if (sendRequests.pendingSends.size() == 0) {
sendRequests.callback.onSendComplete(sendRequests.rank, sendRequests.edge, sendRequests.message);
sendRequestsIterator.remove();
}
}
if (false) {
LOG.info(String.format("%d sending - sent %d comp send %d receive %d pend recv %d pending sends %d waiting %d", executor, sendCount, completedSendCount, receiveCount, pendingReceiveCount, pendingSends.size(), waitForCompletionSends.size()));
}
for (int i = 0; i < registeredReceives.size(); i++) {
MPIReceiveRequests receiveRequests = registeredReceives.get(i);
try {
Iterator<MPIRequest> requestIterator = receiveRequests.pendingRequests.iterator();
while (requestIterator.hasNext()) {
MPIRequest r = requestIterator.next();
Status status = r.request.testStatus();
if (status != null) {
if (!status.isCancelled()) {
// LOG.info(String.format("%d Receive completed: from %d size %d %d",
// executor, receiveRequests.rank, status.getCount(MPI.BYTE), ++receiveCount));
++receiveCount;
// lets call the callback about the receive complete
r.buffer.setSize(status.getCount(MPI.BYTE));
receiveRequests.callback.onReceiveComplete(receiveRequests.rank, receiveRequests.edge, r.buffer);
// LOG.info(String.format("%d finished calling the on complete method", executor));
requestIterator.remove();
} else {
throw new RuntimeException("MPI receive request cancelled");
}
} else {
break;
}
}
// this request has completed
} catch (MPIException e) {
LOG.log(Level.SEVERE, "Twister2Network failure", e);
throw new RuntimeException("Twister2Network failure", e);
}
}
}
use of mpi.MPIException in project twister2 by DSC-SPIDAL.
the class TWSMPIChannel method postMessage.
/**
* Send a message to the given rank.
*
* @param requests the message
*/
private void postMessage(MPISendRequests requests) {
MPIMessage message = requests.message;
for (int i = 0; i < message.getBuffers().size(); i++) {
try {
sendCount++;
MPIBuffer buffer = message.getBuffers().get(i);
Request request = comm.iSend(buffer.getByteBuffer(), buffer.getSize(), MPI.BYTE, requests.rank, message.getHeader().getEdge());
// register to the loop to make progress on the send
requests.pendingSends.add(new MPIRequest(request, buffer));
} catch (MPIException e) {
throw new RuntimeException("Failed to send message to rank: " + requests.rank);
}
}
}
use of mpi.MPIException in project twister2 by DSC-SPIDAL.
the class MPIProcess method main.
public static void main(String[] args) {
Options cmdOptions = null;
try {
MPI.Init(args);
int rank = MPI.COMM_WORLD.getRank();
int size = MPI.COMM_WORLD.getSize();
cmdOptions = setupOptions();
CommandLineParser parser = new DefaultParser();
// parse the help options first.
CommandLine cmd = parser.parse(cmdOptions, args);
// load the configuration
// we are loading the configuration for all the components
Config config = loadConfigurations(cmd, rank);
// normal worker
LOG.log(Level.FINE, "A worker process is starting...");
worker(config, rank);
} catch (MPIException e) {
LOG.log(Level.SEVERE, "Failed the MPI process", e);
throw new RuntimeException(e);
} catch (ParseException e) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("SubmitterMain", cmdOptions);
throw new RuntimeException("Error parsing command line options: ", e);
} finally {
try {
MPI.Finalize();
} catch (MPIException ignore) {
}
}
}
Aggregations