Search in sources :

Example 1 with Configurable

use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.

the class PluginDescriptor method packParameters.

/**
 * Packs parameters inputed by user into appropriate parameters set required for this plugin.
 * Performs some operations such as file loading on user side.
 *
 * @param parameters input parameters
 * @return output parameters
 * @throws RMException when error occurs
 */
public Object[] packParameters(Object[] parameters) throws RMException {
    int configurableFieldsSize = configurableFields.size();
    List<Object> resultParams = new ArrayList<>(configurableFieldsSize);
    if (parameters.length != configurableFieldsSize) {
        throw new RMException("Incorrect number of parameters: expected " + configurableFieldsSize + ", provided " + parameters.length);
    }
    int counter = 0;
    for (ConfigurableField field : configurableFields) {
        Object value = parameters[counter++];
        Configurable configurable = field.getMeta();
        boolean credentialsFilePath = configurable.credential() && value instanceof String;
        if (configurable.fileBrowser() || credentialsFilePath) {
            try {
                if (value.toString().length() > 0) {
                    value = FileToBytesConverter.convertFileToByteArray(new File(value.toString()));
                } else {
                    // in case if file path is not specified propagate null to plugin
                    // it will decide then if it's acceptable or not
                    value = null;
                }
            } catch (IOException e) {
                throw new RMException("Cannot load file", e);
            }
        }
        resultParams.add(value);
    }
    return resultParams.toArray();
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) File(java.io.File) RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 2 with Configurable

use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.

the class NodeSourceParameterHelper method getParametersWithDynamicParametersUpdatedOnly.

public List<Serializable> getParametersWithDynamicParametersUpdatedOnly(Collection<ConfigurableField> configurableFields, Object[] newParameters, List<Serializable> oldParameters) {
    List<Serializable> mergedParameters = new LinkedList<>();
    mergedParameters.addAll(oldParameters);
    Lambda.forEachWithIndex(configurableFields, (configurableField, index) -> {
        Configurable meta = configurableField.getMeta();
        String newValue = getStringValue(newParameters, index, meta);
        String oldValue = getStringValue(oldParameters.toArray(), index, meta);
        this.updateDynamicParameterIfNotEqual(mergedParameters, newValue, oldValue, index, configurableField);
    });
    return mergedParameters;
}
Also used : Serializable(java.io.Serializable) Configurable(org.ow2.proactive.resourcemanager.nodesource.common.Configurable) LinkedList(java.util.LinkedList)

Example 3 with Configurable

use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.

the class SchedulerTHelper method testJobSubmission.

/**
 * Creates and submit a job from an XML job descriptor, and check, with assertions,
 * event related to this job submission :
 * 1/ job submitted event
 * 2/ job passing from pending to running (with state set to running).
 * 3/ job passing from running to finished (with state set to finished).
 * 4/ every task finished with or without error (configurable)
 * <p>
 * Then returns.
 * <p>
 * This is the simplest events sequence of a job submission. If you need to test
 * specific events or task states (failures, rescheduling etc, you must not use this
 * helper and check events sequence with waitForEvent**() functions.
 *
 * @param userInt         scheduler interface
 * @param jobToSubmit     job object to schedule.
 * @param acceptSkipped   if true then skipped task will not fail the test
 * @param failIfTaskError if true then the test will fail if a task was in error
 * @return JobId, the job's identifier.
 * @throws Exception if an error occurs at job submission, or during
 *                   verification of events sequence.
 */
public JobId testJobSubmission(Scheduler userInt, Job jobToSubmit, boolean acceptSkipped, boolean failIfTaskError) throws Exception {
    JobId id = userInt.submit(jobToSubmit);
    log("Job submitted, id " + id.toString());
    log("Waiting for jobSubmitted");
    JobState receivedState = waitForEventJobSubmitted(id);
    Assert.assertEquals(id, receivedState.getId());
    log("Waiting for job running");
    JobInfo jInfo = waitForEventJobRunning(id);
    Assert.assertEquals(jInfo.getJobId(), id);
    Assert.assertEquals("Job " + jInfo.getJobId(), JobStatus.RUNNING, jInfo.getStatus());
    log("Waiting for job finished");
    jInfo = waitForEventJobFinished(userInt, id);
    Assert.assertEquals("Job " + jInfo.getJobId(), JobStatus.FINISHED, jInfo.getStatus());
    log("Job finished");
    boolean taskError = false;
    String message = "";
    if (jobToSubmit instanceof TaskFlowJob) {
        JobState jobState = userInt.getJobState(id);
        for (TaskState t : jobState.getTasks()) {
            log("Looking at the result of task : " + t.getName());
            if (t.getStatus() == TaskStatus.FAULTY) {
                TaskResult tres = userInt.getTaskResult(jInfo.getJobId(), t.getName());
                if (tres == null) {
                    message = "Task result of " + t.getName() + " should not be null.";
                    taskError = true;
                    break;
                }
                if (tres.getOutput() != null) {
                    System.err.println("Output of failing task (" + t.getName() + ") :");
                    System.err.println(tres.getOutput().getAllLogs(true));
                }
                if (tres.hadException()) {
                    System.err.println("Exception occurred in task (" + t.getName() + ") :");
                    tres.getException().printStackTrace(System.err);
                    message = "Exception occurred in task (" + t.getName() + ")";
                    taskError = true;
                    break;
                }
            } else if (acceptSkipped && t.getStatus() == TaskStatus.SKIPPED) {
            // do nothing
            } else if (t.getStatus() != TaskStatus.FINISHED) {
                message = "Invalid task status for task " + t.getName() + " : " + t.getStatus();
                taskError = true;
                break;
            } else {
                TaskResult tres = userInt.getTaskResult(jInfo.getJobId(), t.getName());
                System.out.println("Output of task (" + t.getName() + ") :");
                System.out.println(tres.getOutput().getAllLogs(true));
            }
        }
    }
    if (taskError && failIfTaskError) {
        fail(message);
    }
    return id;
}
Also used : TaskResult(org.ow2.proactive.scheduler.common.task.TaskResult) TaskState(org.ow2.proactive.scheduler.common.task.TaskState)

Example 4 with Configurable

use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.

the class PluginDescriptor method findConfigurableFileds.

/*
     * Looks through cls which represents a plugin. Collects a configurable
     * skeleton of the plugin.
     */
private void findConfigurableFileds(Class<?> cls, Object instance) {
    if (cls.getSuperclass() != null && cls.getSuperclass() != Object.class) {
        findConfigurableFileds(cls.getSuperclass(), instance);
    }
    for (Field f : cls.getDeclaredFields()) {
        Configurable configurable = f.getAnnotation(Configurable.class);
        if (configurable != null) {
            String name = f.getName();
            f.setAccessible(true);
            Object valueObj = null;
            try {
                valueObj = f.get(instance);
            } catch (Exception e) {
            }
            String value = valueObj == null ? (this.defaultValues.get(name) != null ? this.defaultValues.get(name) : "") : valueObj.toString();
            configurableFields.add(new ConfigurableField(name, value, configurable));
        }
    }
}
Also used : Field(java.lang.reflect.Field) IOException(java.io.IOException) RMException(org.ow2.proactive.resourcemanager.exception.RMException) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 5 with Configurable

use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.

the class BatchJobInfrastructure method startNode.

/**
 * Builds the command line to execute on the PBS frontend and wait for every
 * launched nodes to register. If the node doesn't register (ie. runs
 * {@link #internalRegisterAcquiredNode(Node)} isn't called) before the
 * timeout (configurable) value, an exception is raised. If the qSub command
 * submitted to the PBS frontend fails, the node supposed to be launched is
 * not expected anymore and will be discarded at registration time.
 *
 * @throws RMException
 */
private void startNode() throws RMException {
    CommandLineBuilder clb = new CommandLineBuilder();
    // generate the node name
    // current rmcore shortID should be added to ensure uniqueness
    String nodeName = getBatchinJobSystemName() + "-" + nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
    clb.setNodeName(nodeName);
    clb.setJavaPath(this.javaPath);
    clb.setRmURL(getRmUrl());
    clb.setRmHome(this.schedulingPath);
    clb.setSourceName(this.nodeSource.getName());
    clb.setPaProperties(this.javaOptions);
    try {
        clb.setCredentialsValueAndNullOthers(new String(getCredentials().getBase64()));
    } catch (KeyException e) {
        this.handleFailedDeployment(clb, e);
    }
    InetAddress host = null;
    try {
        host = InetAddress.getByName(this.serverName);
    } catch (UnknownHostException e) {
        this.handleFailedDeployment(clb, e);
    }
    String deleteCmd = getDeleteJobCommand();
    String submitCmd = getSubmitJobCommand();
    // build the command: echo "script.sh params"|qsub params
    String cmd = null;
    String obfuscatedCmd = null;
    try {
        cmd = "echo \\\"" + clb.buildCommandLine(true).replace("\"", "\\\"") + "\\\" | " + submitCmd + " " + this.submitJobOpt;
        obfuscatedCmd = "echo \\\"" + clb.buildCommandLine(false).replace("\"", "\\\"") + "\\\" | " + submitCmd + " " + this.submitJobOpt;
    } catch (IOException e) {
        this.handleFailedDeployment(clb, e);
    }
    // add an deploying node.
    final String dnURL = super.addDeployingNode(nodeName, obfuscatedCmd, "Deploying node on " + getBatchinJobSystemName() + " scheduler", this.nodeTimeOut);
    putPnTimeout(dnURL, Boolean.FALSE);
    // executing the command
    Process p;
    try {
        p = Utils.runSSHCommand(host, cmd, this.sshOptions);
    } catch (IOException e1) {
        throw new RMException("Cannot execute ssh command: " + cmd + " on host: " + this.serverName, e1);
    }
    // recover the Job ID through stdout
    String id = "";
    InputStream in = p.getInputStream();
    int b = -1;
    try {
        while ((b = in.read()) > -1) {
            id += (char) b;
        }
    } catch (IOException e) {
    }
    // check for registration
    // at this point, the ssh process should have already exited because it
    // only handle the job submission, not the execution... furthermore
    // the "id" is defined
    String lf = System.lineSeparator();
    final long timeout = nodeTimeOut;
    long t1 = System.currentTimeMillis();
    // Hack. SSHClient fails but qSub succeeds.
    boolean isJobIDValid = false;
    // Tries to wait for this node
    // registration...
    int circuitBreakerThreshold = 5;
    while (!getPnTimeout(dnURL) && circuitBreakerThreshold > 0) {
        try {
            int exitCode = p.exitValue();
            if (exitCode != 0 && !isJobIDValid) {
                logger.warn("SSH subprocess at " + host.getHostName() + " exit code != 0 but IM tries to recover from this error...Current submit command's output: " + id + " and associated node's name: " + nodeName);
                String extractedID = this.extractSubmitOutput(id);
                String errput = this.extractProcessErrput(p);
                final String description = "SSH command failed to launch node on " + getBatchinJobSystemName() + " scheduler" + lf + "   >Error code: " + exitCode + lf + "   >Errput: " + errput + "   >Output: " + id;
                // registration...
                if (extractedID != null && !extractedID.equals("")) {
                    isJobIDValid = true;
                }
                // defines how to recover from this state
                // throws a RMException if we can't
                handleWrongJobTermination(isJobIDValid, nodeName, dnURL, host, id, description, exitCode, submitCmd, deleteCmd);
            }
        } catch (IllegalThreadStateException e) {
            // process has not returned yet
            logger.trace("Waiting for ssh process to exit in BatchJobInfrastructure");
        }
        if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
            // registration is ok
            p.destroy();
            addNodeAndDecrementDeployingNode(nodeName, this.extractSubmitOutput(id));
            return;
        }
        try {
            logger.debug("Waiting for node " + nodeName + " registration... time to timeout: " + (timeout - (System.currentTimeMillis() - t1)));
            Thread.sleep(BatchJobInfrastructure.NODE_ACQUISITION_CHECK_RATE);
        } catch (Exception e) {
            circuitBreakerThreshold--;
            logger.error("While monitoring ssh subprocess.", e);
        }
    }
    // end of while loop, either deploying node timeout/removed of
    // threshold reached
    // the node is not expected anymore
    atomicRemovePnTimeoutAndJob(nodeName, dnURL, p, id);
    if (circuitBreakerThreshold <= 0) {
        logger.error("Circuit breaker threshold reached while monitoring ssh subprocess.");
        throw new RMException("Several exceptions occurred while monitoring ssh subprocess.");
    }
    // if we are here we reached an invalid state
    throw new RMException("Invalid state, exit from a control loop with threshold > 0 and expected deploying node");
}
Also used : UnknownHostException(java.net.UnknownHostException) InputStream(java.io.InputStream) CommandLineBuilder(org.ow2.proactive.resourcemanager.utils.CommandLineBuilder) Throwables.getStackTraceAsString(com.google.common.base.Throwables.getStackTraceAsString) IOException(java.io.IOException) KeyException(java.security.KeyException) RMException(org.ow2.proactive.resourcemanager.exception.RMException) KeyException(java.security.KeyException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RMException(org.ow2.proactive.resourcemanager.exception.RMException) InetAddress(java.net.InetAddress)

Aggregations

IOException (java.io.IOException)3 RMException (org.ow2.proactive.resourcemanager.exception.RMException)3 Throwables.getStackTraceAsString (com.google.common.base.Throwables.getStackTraceAsString)1 File (java.io.File)1 InputStream (java.io.InputStream)1 Serializable (java.io.Serializable)1 Field (java.lang.reflect.Field)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 InetAddress (java.net.InetAddress)1 UnknownHostException (java.net.UnknownHostException)1 KeyException (java.security.KeyException)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 Configurable (org.ow2.proactive.resourcemanager.nodesource.common.Configurable)1 CommandLineBuilder (org.ow2.proactive.resourcemanager.utils.CommandLineBuilder)1 TaskResult (org.ow2.proactive.scheduler.common.task.TaskResult)1 TaskState (org.ow2.proactive.scheduler.common.task.TaskState)1