Search in sources :

Example 1 with InitNodeException

use of es.bsc.compss.exceptions.InitNodeException in project compss by bsc-wdc.

the class WorkerStarter method killPreviousWorker.

private void killPreviousWorker(String user, String name, int pid) throws InitNodeException {
    if (pid != -1) {
        // Command was started but it is not possible to contact to the worker
        String[] command = getStopCommand(pid);
        ProcessOut po = executeCommand(user, name, command);
        if (po == null) {
            // Queue System managed worker starter
            LOGGER.error("[START_CMD_ERROR]: An Error has occurred when queue system started NIO worker in resource " + name + ". Retries not available in this option.");
            throw new InitNodeException("[START_CMD_ERROR]: An Error has occurred when queue system started NIO worker in resource " + name + ". Retries not available in this option.");
        } else if (po.getExitValue() != 0) {
            // Normal starting process
            LOGGER.error(ERROR_SHUTTING_DOWN_RETRY);
        }
    }
}
Also used : InitNodeException(es.bsc.compss.exceptions.InitNodeException) ProcessOut(es.bsc.compss.nio.master.handlers.ProcessOut)

Example 2 with InitNodeException

use of es.bsc.compss.exceptions.InitNodeException in project compss by bsc-wdc.

the class WorkerStarter method startWorker.

/**
 * Starts the current worker
 *
 * @return
 * @throws InitNodeException
 */
public NIONode startWorker() throws InitNodeException {
    String name = this.nw.getName();
    String user = this.nw.getUser();
    int minPort = this.nw.getConfiguration().getMinPort();
    int maxPort = this.nw.getConfiguration().getMaxPort();
    int port = minPort;
    // Solves exit error 143
    synchronized (addressToWorkerStarter) {
        addressToWorkerStarter.put(name, this);
        LOGGER.debug("[WorkerStarter] Worker starter for " + name + " registers in the hashmap");
    }
    NIONode n = null;
    int pid = -1;
    while (port <= maxPort && !this.toStop) {
        // Kill previous worker processes if any
        killPreviousWorker(user, name, pid);
        // Instantiate the node
        n = new NIONode(name, port);
        // Start the worker
        pid = startWorker(user, name, port);
        // Check worker status
        LOGGER.info("[WorkerStarter] Worker process started. Checking connectivity...");
        checkWorker(n, name);
        // Check received ack
        LOGGER.debug("[WorkerStarter] Retries for " + name + " have finished.");
        if (!this.workerIsReady) {
            // Try next port
            ++port;
        } else {
            // Success, return node
            try {
                Runtime.getRuntime().addShutdownHook(new Ender(this, this.nw, pid));
            } catch (IllegalStateException e) {
                LOGGER.warn("Tried to shutdown vm while it was already being shutdown", e);
            }
            return n;
        }
    }
    // This can be because node is stopping or because we reached the maximum available ports
    if (this.toStop) {
        String msg = "[STOP]: Worker " + name + " stopped during creation because application is stopped";
        LOGGER.warn(msg);
        throw new InitNodeException(msg);
    } else if (!this.workerIsReady) {
        String msg = "[TIMEOUT]: Could not start the NIO worker on resource " + name + " through user " + user + ".";
        LOGGER.warn(msg);
        throw new InitNodeException(msg);
    } else {
        String msg = "[UNKNOWN]: Could not start the NIO worker on resource " + name + " through user " + user + ".";
        LOGGER.warn(msg);
        throw new InitNodeException(msg);
    }
}
Also used : NIONode(es.bsc.comm.nio.NIONode) InitNodeException(es.bsc.compss.exceptions.InitNodeException) Ender(es.bsc.compss.nio.master.handlers.Ender)

Example 3 with InitNodeException

use of es.bsc.compss.exceptions.InitNodeException in project compss by bsc-wdc.

the class GATWorkerNode method initWorkingDir.

private void initWorkingDir() throws InitNodeException {
    LinkedList<URI> traceScripts = new LinkedList<>();
    LinkedList<String> traceParams = new LinkedList<>();
    String host = getHost();
    String installDir = getInstallDir();
    String workingDir = getWorkingDir();
    String user = getUser();
    if (user == null || user.isEmpty()) {
        user = "";
    } else {
        user += "@";
    }
    try {
        String initScriptPath = Protocol.ANY_URI.getSchema() + user + host + File.separator + installDir + GAT_SCRIPT_PATH + INIT_SCRIPT_NAME;
        traceScripts.add(new URI(initScriptPath));
    } catch (URISyntaxException e) {
        new InitNodeException("Error addind initScript");
    }
    String pars = workingDir;
    traceParams.add(pars);
    // Use cleaner to run the trace script and generate the package
    LOGGER.debug("Initializing working dir " + workingDir + "  in host " + getName());
    boolean result = new GATScriptExecutor(this).executeScript(traceScripts, traceParams, "init_" + host);
    if (!result) {
        throw new InitNodeException("Error executing init script for initializing working dir " + workingDir + " in host " + getName());
    }
}
Also used : InitNodeException(es.bsc.compss.exceptions.InitNodeException) URISyntaxException(java.net.URISyntaxException) URI(org.gridlab.gat.URI) MultiURI(es.bsc.compss.types.uri.MultiURI) SimpleURI(es.bsc.compss.types.uri.SimpleURI) GATScriptExecutor(es.bsc.compss.gat.master.utils.GATScriptExecutor) LinkedList(java.util.LinkedList)

Example 4 with InitNodeException

use of es.bsc.compss.exceptions.InitNodeException in project compss by bsc-wdc.

the class NIOWorkerNode method start.

@Override
public void start() throws InitNodeException {
    NIONode n = null;
    try {
        this.workerStarter = new WorkerStarter(this);
        n = this.workerStarter.startWorker();
    } catch (InitNodeException e) {
        ErrorManager.warn("There was an exception when initiating worker " + getName() + ".", e);
        throw e;
    }
    this.node = n;
    this.started = true;
    if (NIOTracer.isActivated()) {
        LOGGER.debug("Initializing NIO tracer " + this.getName());
        NIOTracer.startTracing(this.getName(), this.getUser(), this.getHost(), this.getLimitOfTasks());
    }
}
Also used : NIONode(es.bsc.comm.nio.NIONode) InitNodeException(es.bsc.compss.exceptions.InitNodeException)

Example 5 with InitNodeException

use of es.bsc.compss.exceptions.InitNodeException in project compss by bsc-wdc.

the class WorkerStarter method startWorker.

private int startWorker(String user, String name, int port) throws InitNodeException {
    // Initial wait
    try {
        Thread.sleep(START_WORKER_INITIAL_WAIT);
    } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
    }
    long timer = START_WORKER_INITIAL_WAIT;
    // Try to launch the worker until we receive the PID or we timeout
    int pid = -1;
    String[] command = getStartCommand(port);
    do {
        ProcessOut po = executeCommand(user, name, command);
        if (po == null) {
            // Queue System managed worker starter
            LOGGER.debug("Worker process started in resource " + name + " by queue system.");
            pid = 0;
        } else if (po.getExitValue() == 0) {
            // Success
            String output = po.getOutput();
            String[] lines = output.split("\n");
            pid = Integer.parseInt(lines[lines.length - 1]);
        } else {
            if (timer > MAX_WAIT_FOR_SSH) {
                // Timeout
                throw new InitNodeException("[START_CMD_ERROR]: Could not start the NIO worker in resource " + name + " through user " + user + ".\n" + "OUTPUT:" + po.getOutput() + "\n" + "ERROR:" + po.getError() + "\n");
            }
            LOGGER.warn(" Worker process failed to start in resource " + name + ". Retrying...");
        }
        // Sleep between retries
        try {
            Thread.sleep(4 * WAIT_TIME_UNIT);
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
        }
        timer = timer + (4 * WAIT_TIME_UNIT);
    } while (pid < 0);
    return pid;
}
Also used : InitNodeException(es.bsc.compss.exceptions.InitNodeException) ProcessOut(es.bsc.compss.nio.master.handlers.ProcessOut)

Aggregations

InitNodeException (es.bsc.compss.exceptions.InitNodeException)5 NIONode (es.bsc.comm.nio.NIONode)2 ProcessOut (es.bsc.compss.nio.master.handlers.ProcessOut)2 GATScriptExecutor (es.bsc.compss.gat.master.utils.GATScriptExecutor)1 Ender (es.bsc.compss.nio.master.handlers.Ender)1 MultiURI (es.bsc.compss.types.uri.MultiURI)1 SimpleURI (es.bsc.compss.types.uri.SimpleURI)1 URISyntaxException (java.net.URISyntaxException)1 LinkedList (java.util.LinkedList)1 URI (org.gridlab.gat.URI)1