use of org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2 in project scheduling by ow2-proactive.
the class TestSSHInfrastructureV2 method testSSHInfrastructureV2.
@Test
public void testSSHInfrastructureV2() throws Exception {
nsname = "testSSHInfra";
resourceManager = this.rmHelper.getResourceManager();
RMTHelper.log("Test - Create SSH infrastructure on ssh://localhost on port " + this.port);
resourceManager.createNodeSource(nsname, SSHInfrastructureV2.class.getName(), infraParams, StaticPolicy.class.getName(), policyParameters, NODES_NOT_RECOVERABLE);
this.rmHelper.waitForNodeSourceCreation(nsname, NB_NODES, this.rmHelper.getMonitorsHandler());
RMTHelper.log("Checking scheduler state after node source creation");
RMState s = resourceManager.getState();
assertEquals(NB_NODES, s.getTotalNodesNumber());
assertEquals(NB_NODES, s.getFreeNodesNumber());
}
use of org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2 in project scheduling by ow2-proactive.
the class TestSSHInfrastructureV2RestartDownNodesPolicy method testSSHInfrastructureV2WithRestartDownNodes.
@Test
public void testSSHInfrastructureV2WithRestartDownNodes() throws Exception {
nsname = "testSSHInfraRestart";
resourceManager = this.rmHelper.getResourceManager();
RMTHelper.log("Test - Create SSH infrastructure with RestartDownNodes policy on ssh://localhost on port " + TestSSHInfrastructureV2.port);
resourceManager.createNodeSource(nsname, SSHInfrastructureV2.class.getName(), TestSSHInfrastructureV2.infraParams, RestartDownNodesPolicy.class.getName(), TestSSHInfrastructureV2.policyParameters, NODES_NOT_RECOVERABLE);
RMMonitorsHandler monitorsHandler = this.rmHelper.getMonitorsHandler();
this.rmHelper.waitForNodeSourceCreation(nsname, NB_NODES, monitorsHandler);
RMState s = resourceManager.getState();
assertEquals(NB_NODES, s.getTotalNodesNumber());
assertEquals(NB_NODES, s.getFreeNodesNumber());
NodeSet nodeset = resourceManager.getNodes(new Criteria(NB_NODES));
if (nodeset.size() != NB_NODES) {
RMTHelper.log("Illegal state : the infrastructure could not deploy nodes or they died immediately. Ending test");
throw new RuntimeException("Illegal state : the infrastructure could not deploy nodes or they died immediately. Ending test");
}
for (Node n : nodeset) {
rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL(), 60000, monitorsHandler);
}
String nodeUrl = nodeset.get(0).getNodeInformation().getURL();
RMTHelper.log("Killing nodes");
// Nodes will be redeployed only if we kill the whole runtime
rmHelper.killRuntime(nodeUrl);
RMTHelper.log("Wait for down nodes detection by the rm");
for (Node n : nodeset) {
RMNodeEvent ev = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL(), 120000, monitorsHandler);
assertEquals(NodeState.DOWN, ev.getNodeState());
}
for (Node n : nodeset) {
rmHelper.waitForNodeEvent(RMEventType.NODE_REMOVED, n.getNodeInformation().getURL(), 120000, monitorsHandler);
}
RMTHelper.log("Dumping events not consumed yet");
monitorsHandler.dumpEvents();
RMTHelper.log("Wait for nodes restart by the policy");
rmHelper.waitForAnyMultipleNodeEvent(RMEventType.NODE_ADDED, NB_NODES, monitorsHandler);
for (int i = 0; i < NB_NODES; i++) {
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_REMOVED, monitorsHandler);
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_ADDED, monitorsHandler);
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, monitorsHandler);
}
RMTHelper.log("Final checks on the scheduler state");
nodeset = resourceManager.getNodes(new Criteria(NB_NODES));
for (Node n : nodeset) {
System.out.println("NODE::" + n.getNodeInformation().getURL());
}
s = resourceManager.getState();
assertEquals(NB_NODES, s.getTotalNodesNumber());
// check amount of all nodes that are not down
assertEquals(NB_NODES, s.getTotalAliveNodesNumber());
}
use of org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2 in project scheduling by ow2-proactive.
the class TestNodeSourceThreadPool method testNodeSourceDeploymentThreadsDoNotLeakWithInfiniteRetry.
@Test
public /*
* This setup makes LOST nodes on one host and FREE nodes on another host.
* We use a configuration of two threads for the nodes deployment, and an
* infrastructure with infinite retry number for deploying. After cycling
* two redeployment phases thanks to the policy, we expect the situation
* to be:
* - one thread being always taken by the infinite retry on the fake host
* - one thread available to deploy with success/redeploy on the localhost
* A timeout is thrown if a thread leak prevents the redeployment to happen
*/
void testNodeSourceDeploymentThreadsDoNotLeakWithInfiniteRetry() throws Exception {
String rmConfig = new File(RMTHelper.class.getResource(SIX_THREADS_CONFIG_FILE_PATH).toURI()).getAbsolutePath();
this.rmHelper.startRM(rmConfig);
this.resourceManager = this.rmHelper.getResourceManager();
this.resourceManager.defineNodeSource(NODE_SOURCE_NAME, SSHInfrastructureV2.class.getName(), getInfiniteRetryInfrastructureParameters(), RestartDownNodesPolicy.class.getName(), getRestartDownNodesPolicyParameters(), NODES_NOT_RECOVERABLE);
this.resourceManager.deployNodeSource(NODE_SOURCE_NAME);
RMTHelper.log("Waiting for the RM to have one free node");
while (this.resourceManager.getState().getFreeNodesNumber() != ONE_NODE_PER_HOST) {
Thread.sleep(ACTIVE_WAITING_PERIOD);
}
assertEquals(1, this.resourceManager.getState().getAliveNodes().size());
for (int i = 0; i < 2; i++) {
String freeNodeUrl = this.resourceManager.getState().getFreeNodes().iterator().next();
RMTHelper.killRuntime(freeNodeUrl);
RMMonitorsHandler monitor = this.rmHelper.getMonitorsHandler();
monitor.flushEvents();
RMTHelper.log("Waiting for the RM to detect the down node");
RMNodeEvent nodeEvent = RMTHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, freeNodeUrl, NODE_STATE_CHANGED_TIMEOUT, monitor);
assertEquals(NodeState.DOWN, nodeEvent.getNodeState());
RMTHelper.log("Waiting for the RM to redeploy the down node");
int totalWaitingTime = 0;
while (this.resourceManager.getState().getFreeNodesNumber() != ONE_NODE_PER_HOST) {
Thread.sleep(ACTIVE_WAITING_PERIOD);
totalWaitingTime += ACTIVE_WAITING_PERIOD;
if (totalWaitingTime > NODE_STATE_CHANGED_TIMEOUT) {
throw new TestTimedOutException(NODE_STATE_CHANGED_TIMEOUT, TimeUnit.MILLISECONDS);
}
}
}
assertEquals(1, this.resourceManager.getState().getAliveNodes().size());
}
use of org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2 in project scheduling by ow2-proactive.
the class RecoverSSHInfrastructureV2Test method startRmAndCheckInitialState.
private void startRmAndCheckInitialState() throws Exception {
// start RM
startRmWithConfig(START_CONFIG);
assertThat(PAResourceManagerProperties.RM_PRESERVE_NODES_ON_SHUTDOWN.getValueAsBoolean()).isTrue();
assertThat(rmHelper.isRMStarted()).isTrue();
// check the initial state of the RM
assertThat(resourceManager.getState().getAllNodes().size()).isEqualTo(0);
resourceManager.createNodeSource(NODE_SOURCE_NAME, SSHInfrastructureV2.class.getName(), TestSSHInfrastructureV2.infraParams, StaticPolicy.class.getName(), TestSSHInfrastructureV2.policyParameters, NODES_RECOVERABLE);
RMTHelper.waitForNodeSourceCreation(NODE_SOURCE_NAME, TestSSHInfrastructureV2.NB_NODES, this.rmHelper.getMonitorsHandler());
RMMonitorEventReceiver resourceManagerMonitor = (RMMonitorEventReceiver) resourceManager;
List<RMNodeSourceEvent> nodeSourceEvent = resourceManagerMonitor.getInitialState().getNodeSourceEvents();
assertThat(nodeSourceEvent.size()).isEqualTo(1);
assertThat(nodeSourceEvent.get(0).getSourceName()).isEqualTo(NODE_SOURCE_NAME);
assertThat(resourceManagerMonitor.getState().getAllNodes().size()).isEqualTo(TestSSHInfrastructureV2.NB_NODES);
}
use of org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2 in project scheduling by ow2-proactive.
the class SSHInfrastructureV2 method startNodeImpl.
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using SSH, register it manually in the
* nodesource.
*
* @param hostTracker The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws RMException
* acquisition failed
*/
public void startNodeImpl(final HostTracker hostTracker, final int nbNodes, final List<String> depNodeURLs) throws RMException {
String fs = getTargetOSObj().fs;
// we set the java security policy file
ArrayList<String> sb = new ArrayList<>();
final boolean containsSpace = schedulingPath.contains(" ");
if (!deploymentMode.equals("useNodeJarStartupScript")) {
if (containsSpace) {
sb.add("-Dproactive.home=\"" + schedulingPath + "\"");
} else {
sb.add("-Dproactive.home=" + schedulingPath);
}
}
String securitycmd = CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine();
if (!this.javaOptions.contains(securitycmd) && !deploymentMode.equals("useNodeJarStartupScript")) {
if (containsSpace) {
securitycmd += "\"";
}
securitycmd += this.schedulingPath + fs + "config" + fs;
securitycmd += "security.java.policy-client";
if (containsSpace) {
securitycmd += "\"";
}
sb.add(securitycmd);
}
// we set the log4j configuration file
String log4jcmd = CentralPAPropertyRepository.LOG4J.getCmdLine();
if (!this.javaOptions.contains(log4jcmd) && !deploymentMode.equals("useNodeJarStartupScript")) {
// log4j only understands urls
if (containsSpace) {
log4jcmd += "\"";
}
log4jcmd += "file:";
if (!this.schedulingPath.startsWith("/")) {
log4jcmd += "/";
}
log4jcmd += this.schedulingPath.replace("\\", "/");
log4jcmd += "/config/log/node.properties";
if (containsSpace) {
log4jcmd += "\"";
}
sb.add(log4jcmd);
}
// we add extra java/PA configuration
if (this.javaOptions != null && !this.javaOptions.trim().isEmpty()) {
sb.add(this.javaOptions.trim());
}
CommandLineBuilder clb = super.getDefaultCommandLineBuilder(getTargetOSObj());
final boolean deployNodesInDetachedMode = PAResourceManagerProperties.RM_NODES_RECOVERY.getValueAsBoolean() || PAResourceManagerProperties.RM_PRESERVE_NODES_ON_SHUTDOWN.getValueAsBoolean();
if (deployNodesInDetachedMode) {
// if we do not want to kill the nodes when the RM exits or
// restarts, then we should launch the nodes in background and
// ignore the RM termination signal
clb.setDetached();
}
clb.setJavaPath(this.javaPath);
clb.setRmHome(this.schedulingPath);
clb.setPaProperties(sb);
final String nodeName = nodeNameBuilder.generateNodeName(hostTracker);
clb.setNodeName(nodeName);
clb.setNumberOfNodes(nbNodes);
// set the stratup script retrieved from NodeCommandLine.properties
if (!this.deploymentMode.equals("autoGenerated")) {
clb.setDeploymentMode(deploymentMode);
clb.setStartupScript((deploymentMode.equals("useStartupScript") ? startupScriptStandard : startupScriptWithNodeJarDownload));
}
if (this.deploymentMode.equals("useNodeJarStartupScript")) {
clb.setNodeJarUrl(nodeJarUrl);
}
// finally, the credential's value
String credString;
try {
Client currentClient = super.nodeSource.getAdministrator();
credString = new String(currentClient.getCredentials().getBase64());
} catch (KeyException e) {
throw new RMException("Could not get base64 credentials", e);
}
clb.setCredentialsValueAndNullOthers(credString);
// add an expected node. every unexpected node will be discarded
String cmdLine;
String obfuscatedCmdLine;
try {
cmdLine = clb.buildCommandLine(true);
obfuscatedCmdLine = clb.buildCommandLine(false);
} catch (IOException e) {
throw new RMException("Cannot build the " + RMNodeStarter.class.getSimpleName() + "'s command line.", e);
}
// one escape the command to make it runnable through ssh
if (cmdLine.contains("\"")) {
cmdLine = cmdLine.replaceAll("\"", "\\\\\"");
}
final String finalCmdLine = cmdLine;
// The final addDeployingNode() method will initiate a timeout that
// will declare node as lost and set the description of the failure
// with a simplistic message, since there is no way to override this
// mechanism we consider only 90% of timeout to set custom description
// in case of failure and still allow global timeout
final int shorterTimeout = Math.round((90 * super.nodeTimeOut) / 100);
JSch jsch = new JSch();
final String msg = "deploy on " + hostTracker.getResolvedAddress();
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmdLine, msg, super.nodeTimeOut));
addTimeouts(depNodeURLs);
Session session;
try {
// Create ssh session to the hostname
session = jsch.getSession(this.sshUsername, hostTracker.getResolvedAddress().getHostName(), this.sshPort);
if (this.sshPassword == null) {
jsch.addIdentity(this.sshUsername, this.sshPrivateKey, null, null);
} else {
session.setPassword(this.sshPassword);
}
session.setConfig(this.sshOptions);
session.connect(shorterTimeout);
} catch (JSchException e) {
multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
throw new RMException("unable to " + msg, e);
}
SSHInfrastructureV2.logger.info("Executing SSH command: '" + finalCmdLine + "'");
ScheduledExecutorService deployService = Executors.newSingleThreadScheduledExecutor();
try {
// Create ssh channel to run the cmd
ByteArrayOutputStream baos = new ByteArrayOutputStream(DEFAULT_OUTPUT_BUFFER_LENGTH);
ChannelExec channel;
try {
channel = (ChannelExec) session.openChannel("exec");
channel.setCommand(finalCmdLine);
channel.setOutputStream(baos);
channel.setErrStream(baos);
channel.connect();
} catch (JSchException e) {
multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
throw new RMException("unable to " + msg, e);
}
final ChannelExec chan = channel;
Future<Void> deployResult = deployService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
while (!shutDown.get() && !checkAllNodesAreAcquiredAndDo(createdNodeNames, null, null)) {
if (anyTimedOut(depNodeURLs)) {
throw new IllegalStateException("The upper infrastructure has issued a timeout");
}
// processes live completely independently
if (!deployNodesInDetachedMode && chan.getExitStatus() != PROCESS_STILL_RUNNING_VALUE) {
throw new IllegalStateException("The jvm process of the node has exited prematurely");
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// we know the cause of this
return null;
// interruption just exit
}
}
// Victory
return null;
}
});
try {
deployResult.get(shorterTimeout, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
declareLostAndThrow("Unable to " + msg + " due to " + e.getCause(), depNodeURLs, channel, baos, e);
} catch (InterruptedException e) {
deployResult.cancel(true);
declareLostAndThrow("Unable to " + msg + " due to an interruption", depNodeURLs, channel, baos, e);
} catch (TimeoutException e) {
deployResult.cancel(true);
declareLostAndThrow("Unable to " + msg + " due to timeout", depNodeURLs, channel, baos, e);
} finally {
channel.disconnect();
}
} finally {
removeTimeouts(depNodeURLs);
session.disconnect();
deployService.shutdownNow();
}
}
Aggregations