Search in sources :

Example 1 with Connection

use of es.bsc.comm.Connection in project compss by bsc-wdc.

the class NIOWorkerNode method stop.

@Override
public void stop(ShutdownListener sl) {
    if (started) {
        LOGGER.debug("Shutting down " + this.getName());
        if (node == null) {
            sl.notifyFailure(new UnstartedNodeException());
            LOGGER.error("Shutdown has failed");
        }
        Connection c = NIOAgent.getTransferManager().startConnection(node);
        commManager.shuttingDown(this, c, sl);
        CommandShutdown cmd = new CommandShutdown(null, null);
        c.sendCommand(cmd);
        c.receive();
        c.finishConnection();
    } else {
        LOGGER.debug("Worker " + this.getName() + " has not started. Setting this to be stopped");
        workerStarter.setToStop();
        sl.notifyEnd();
    }
}
Also used : UnstartedNodeException(es.bsc.compss.exceptions.UnstartedNodeException) Connection(es.bsc.comm.Connection) CommandShutdown(es.bsc.compss.nio.commands.CommandShutdown)

Example 2 with Connection

use of es.bsc.comm.Connection in project compss by bsc-wdc.

the class NIOWorkerNode method shutdownExecutionManager.

@Override
public void shutdownExecutionManager(ExecutorShutdownListener esl) {
    if (started) {
        LOGGER.debug("Shutting down execution manager " + this.getName());
        if (node == null) {
            esl.notifyFailure(new UnstartedNodeException());
            LOGGER.error("Shutdown execution manager has failed");
        }
        Connection c = NIOAgent.getTransferManager().startConnection(node);
        commManager.shuttingDownEM(this, c, esl);
        CommandExecutorShutdown cmd = new CommandExecutorShutdown(null);
        c.sendCommand(cmd);
        c.receive();
        c.finishConnection();
    } else {
        LOGGER.debug("Worker " + this.getName() + " has not started. Considering execution manager stopped");
        esl.notifyEnd();
    }
}
Also used : CommandExecutorShutdown(es.bsc.compss.nio.commands.CommandExecutorShutdown) UnstartedNodeException(es.bsc.compss.exceptions.UnstartedNodeException) Connection(es.bsc.comm.Connection)

Example 3 with Connection

use of es.bsc.comm.Connection in project compss by bsc-wdc.

the class NIOWorkerNode method generateWorkersDebugInfo.

@Override
public boolean generateWorkersDebugInfo() {
    if (started) {
        LOGGER.debug("Sending command to generate worker debug files for " + this.getHost());
        if (node == null) {
            LOGGER.error("Worker debug files generation has failed.");
        }
        Connection c = NIOAgent.getTransferManager().startConnection(node);
        CommandGenerateWorkerDebugFiles cmd = new CommandGenerateWorkerDebugFiles();
        c.sendCommand(cmd);
        c.receive();
        c.finishConnection();
        commManager.waitUntilWorkersDebugInfoGenerated();
        LOGGER.debug("Worker debug files generated");
        return true;
    } else {
        LOGGER.debug("Worker debug files not generated because worker was not started");
        return false;
    }
}
Also used : Connection(es.bsc.comm.Connection) CommandGenerateWorkerDebugFiles(es.bsc.compss.nio.commands.workerFiles.CommandGenerateWorkerDebugFiles)

Example 4 with Connection

use of es.bsc.comm.Connection in project compss by bsc-wdc.

the class WorkerStarter method checkWorker.

private void checkWorker(NIONode n, String name) {
    long delay = WAIT_TIME_UNIT;
    long totalWait = 0;
    CommandCheckWorker cmd = new CommandCheckWorker(DEPLOYMENT_ID, name);
    do {
        if (DEBUG) {
            LOGGER.debug("[WorkerStarter] Sending check command to worker " + name);
        }
        // Send command check
        Connection c = NIOAdaptor.getTransferManager().startConnection(n);
        c.sendCommand(cmd);
        c.receive();
        c.finishConnection();
        // Sleep before next iteration
        try {
            LOGGER.debug("[WorkerStarter] Waiting to send next check worker command with delay " + delay);
            Thread.sleep(delay);
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
        }
        totalWait += delay;
        delay = (delay < 3_900) ? delay * 2 : 4_000;
    } while (!this.workerIsReady && totalWait < MAX_WAIT_FOR_INIT && !this.toStop);
}
Also used : CommandCheckWorker(es.bsc.compss.nio.commands.CommandCheckWorker) Connection(es.bsc.comm.Connection)

Example 5 with Connection

use of es.bsc.comm.Connection in project compss by bsc-wdc.

the class NIOWorker method sendTaskDone.

public void sendTaskDone(NIOTask nt, boolean successful) {
    int taskID = nt.getJobId();
    // Notify task done
    int retries = 0;
    Connection c = null;
    while (retries < MAX_RETRIES) {
        try {
            c = TM.startConnection(masterNode);
            if (c == null) {
                throw new Exception("Nullable connection");
            }
            break;
        } catch (Exception e) {
            if (retries >= MAX_RETRIES) {
                WORKER_LOGGER.error("Exception sending Task notification", e);
                return;
            } else {
                try {
                    Random randomGenerator = new Random(System.currentTimeMillis());
                    int waitNanos = (int) (50000 * randomGenerator.nextFloat());
                    Thread.sleep(0, waitNanos);
                } catch (InterruptedException e1) {
                    Thread.currentThread().interrupt();
                }
                retries++;
            }
        }
    }
    NIOTaskResult tr = new NIOTaskResult(taskID, nt.getParams());
    if (WORKER_LOGGER.isDebugEnabled()) {
        WORKER_LOGGER.debug("TASK RESULT FOR TASK ID " + taskID);
        WORKER_LOGGER.debug(tr);
    }
    CommandTaskDone cmd = new CommandTaskDone(this, tr, successful);
    c.sendCommand(cmd);
    // Check that output files already exists. If not exists generate an empty one.
    String taskFileOutName = workingDir + File.separator + "jobs" + File.separator + "job" + nt.getJobId() + "_" + nt.getHist() + ".out";
    String taskFileErrName = workingDir + File.separator + "jobs" + File.separator + "job" + nt.getJobId() + "_" + nt.getHist() + ".err";
    File fout = new File(taskFileOutName);
    if (!fout.exists()) {
        String errorMessage = "Autogenerated Empty file. An error was produced before generating any log in the stdout";
        try (FileOutputStream outputStream = new FileOutputStream(fout)) {
            outputStream.write(errorMessage.getBytes());
            outputStream.close();
        } catch (IOException ioe) {
            WORKER_LOGGER.error("IOException writing worker output file: " + fout, ioe);
        }
    }
    File ferr = new File(taskFileErrName);
    if (!ferr.exists()) {
        String errorMessage = "Autogenerated Empty file. An error was produced before generating any log in the stderr";
        try (FileOutputStream errorStream = new FileOutputStream(ferr)) {
            errorStream.write(errorMessage.getBytes());
            errorStream.close();
        } catch (IOException ioe) {
            WORKER_LOGGER.error("IOException writing worker error file: " + ferr, ioe);
        }
    }
    if (isWorkerDebugEnabled) {
        WORKER_LOGGER.debug("Sending file " + taskFileOutName);
        c.sendDataFile(taskFileOutName);
        WORKER_LOGGER.debug("Sending file " + taskFileErrName);
        c.sendDataFile(taskFileErrName);
    } else {
        if (!successful) {
            WORKER_LOGGER.debug("Sending file " + taskFileOutName);
            c.sendDataFile(taskFileOutName);
            WORKER_LOGGER.debug("Sending file " + taskFileErrName);
            c.sendDataFile(taskFileErrName);
        }
    }
    c.finishConnection();
    WORKER_LOGGER.debug("Task " + taskID + " send task done with " + retries + " retries");
}
Also used : Random(java.util.Random) NIOTaskResult(es.bsc.compss.nio.NIOTaskResult) CommandTaskDone(es.bsc.compss.nio.commands.CommandTaskDone) FileOutputStream(java.io.FileOutputStream) Connection(es.bsc.comm.Connection) IOException(java.io.IOException) File(java.io.File) StorageException(storage.StorageException) SerializedObjectException(es.bsc.compss.nio.exceptions.SerializedObjectException) IOException(java.io.IOException) CommException(es.bsc.comm.exceptions.CommException) InitializationException(es.bsc.compss.nio.worker.exceptions.InitializationException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) InvalidMapException(es.bsc.compss.nio.worker.exceptions.InvalidMapException)

Aggregations

Connection (es.bsc.comm.Connection)9 UnstartedNodeException (es.bsc.compss.exceptions.UnstartedNodeException)3 SerializedObjectException (es.bsc.compss.nio.exceptions.SerializedObjectException)2 IOException (java.io.IOException)2 CommException (es.bsc.comm.exceptions.CommException)1 NIONode (es.bsc.comm.nio.NIONode)1 NIOTask (es.bsc.compss.nio.NIOTask)1 NIOTaskResult (es.bsc.compss.nio.NIOTaskResult)1 CommandCheckWorker (es.bsc.compss.nio.commands.CommandCheckWorker)1 CommandDataDemand (es.bsc.compss.nio.commands.CommandDataDemand)1 CommandExecutorShutdown (es.bsc.compss.nio.commands.CommandExecutorShutdown)1 CommandNewTask (es.bsc.compss.nio.commands.CommandNewTask)1 CommandShutdown (es.bsc.compss.nio.commands.CommandShutdown)1 CommandTaskDone (es.bsc.compss.nio.commands.CommandTaskDone)1 Data (es.bsc.compss.nio.commands.Data)1 CommandGeneratePackage (es.bsc.compss.nio.commands.tracing.CommandGeneratePackage)1 CommandGenerateWorkerDebugFiles (es.bsc.compss.nio.commands.workerFiles.CommandGenerateWorkerDebugFiles)1 DataRequest (es.bsc.compss.nio.dataRequest.DataRequest)1 InitializationException (es.bsc.compss.nio.worker.exceptions.InitializationException)1 InvalidMapException (es.bsc.compss.nio.worker.exceptions.InvalidMapException)1