use of com.yahoo.vespa.hosted.dockerapi.Container in project vespa by vespa-engine.
the class DockerOperationsImpl method executeCommandInNetworkNamespace.
@Override
public void executeCommandInNetworkNamespace(ContainerName containerName, String... command) {
final PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName);
final Integer containerPid = docker.getContainer(containerName).filter(container -> container.state.isRunning()).map(container -> container.pid).orElseThrow(() -> new RuntimeException("PID not found for container with name: " + containerName.asString()));
Path procPath = environment.getPathResolver().getPathToRootOfHost().resolve("proc");
final String[] wrappedCommand = Stream.concat(Stream.of("sudo", "nsenter", String.format("--net=%s/%d/ns/net", procPath, containerPid), "--"), Stream.of(command)).toArray(String[]::new);
try {
Pair<Integer, String> result = processExecuter.exec(wrappedCommand);
if (result.getFirst() != 0) {
String msg = String.format("Failed to execute %s in network namespace for %s (PID = %d), exit code: %d, output: %s", Arrays.toString(wrappedCommand), containerName.asString(), containerPid, result.getFirst(), result.getSecond());
logger.error(msg);
throw new RuntimeException(msg);
}
} catch (IOException e) {
logger.warning(String.format("IOException while executing %s in network namespace for %s (PID = %d)", Arrays.toString(wrappedCommand), containerName.asString(), containerPid), e);
throw new RuntimeException(e);
}
}
use of com.yahoo.vespa.hosted.dockerapi.Container in project vespa by vespa-engine.
the class NodeAgentImpl method converge.
// Public for testing
void converge() {
final Optional<ContainerNodeSpec> nodeSpecOptional = nodeRepository.getContainerNodeSpec(hostname);
// We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent
if (!nodeSpecOptional.isPresent() && expectNodeNotInNodeRepo)
return;
final ContainerNodeSpec nodeSpec = nodeSpecOptional.orElseThrow(() -> new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname)));
expectNodeNotInNodeRepo = false;
Optional<Container> container = getContainer();
if (!nodeSpec.equals(lastNodeSpec)) {
// will change and we will be reporting duplicate metrics.
if (container.map(c -> c.state.isRunning()).orElse(false)) {
storageMaintainer.writeMetricsConfig(containerName, nodeSpec);
}
addDebugMessage("Loading new node spec: " + nodeSpec.toString());
lastNodeSpec = nodeSpec;
}
switch(nodeSpec.nodeState) {
case ready:
case reserved:
case parked:
case failed:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case active:
storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
storageMaintainer.getDiskUsageFor(containerName).map(diskUsage -> (double) diskUsage / BYTES_IN_GB / nodeSpec.minDiskAvailableGb).filter(diskUtil -> diskUtil >= 0.8).ifPresent(diskUtil -> storageMaintainer.removeOldFilesFromNode(containerName));
scheduleDownLoadIfNeeded(nodeSpec);
if (isDownloadingImage()) {
addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString());
return;
}
container = removeContainerIfNeededUpdateContainerState(nodeSpec, container);
if (!container.isPresent()) {
storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
containerState = STARTING;
startContainer(nodeSpec);
containerState = UNKNOWN;
}
runLocalResumeScriptIfNeeded(nodeSpec);
// Because it's more important to stop a bad release from rolling out in prod,
// we put the resume call last. So if we fail after updating the node repo attributes
// but before resume, the app may go through the tenant pipeline but will halt in prod.
//
// Note that this problem exists only because there are 2 different mechanisms
// that should really be parts of a single mechanism:
// - The content of node repo is used to determine whether a new Vespa+application
// has been successfully rolled out.
// - Slobrok and internal orchestrator state is used to determine whether
// to allow upgrade (suspend).
updateNodeRepoWithCurrentAttributes(nodeSpec);
logger.info("Call resume against Orchestrator");
orchestrator.resume(hostname);
break;
case inactive:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case provisioned:
nodeRepository.markAsDirty(hostname);
break;
case dirty:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready");
storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
updateNodeRepoWithCurrentAttributes(nodeSpec);
nodeRepository.markNodeAvailableForNewAllocation(hostname);
expectNodeNotInNodeRepo = true;
break;
default:
throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name());
}
}
Aggregations