Search in sources :

Example 6 with ResourceDescriptor

use of com.microsoft.frameworklauncher.common.model.ResourceDescriptor in project pai by Microsoft.

the class Node method compareTo.

// Compare two node's AvailableResource,  order is Gpu, Cpu, Memory
@Override
public int compareTo(Node other) {
    ResourceDescriptor thisAvailableResource = this.getAvailableResource();
    ResourceDescriptor otherAvailableResource = other.getAvailableResource();
    if (thisAvailableResource.getGpuNumber() > otherAvailableResource.getGpuNumber())
        return 1;
    if (thisAvailableResource.getGpuNumber() < otherAvailableResource.getGpuNumber()) {
        return -1;
    }
    if (thisAvailableResource.getCpuNumber() > otherAvailableResource.getCpuNumber()) {
        return 1;
    }
    if (thisAvailableResource.getCpuNumber() < otherAvailableResource.getCpuNumber()) {
        return -1;
    }
    if (thisAvailableResource.getMemoryMB() > otherAvailableResource.getMemoryMB()) {
        return 1;
    }
    if (thisAvailableResource.getMemoryMB() < otherAvailableResource.getMemoryMB()) {
        return -1;
    }
    return 0;
}
Also used : ResourceDescriptor(com.microsoft.frameworklauncher.common.model.ResourceDescriptor)

Example 7 with ResourceDescriptor

use of com.microsoft.frameworklauncher.common.model.ResourceDescriptor in project pai by Microsoft.

the class SelectionManager method selectCandidateGpuAttribute.

@VisibleForTesting
public synchronized Long selectCandidateGpuAttribute(Node node, Integer requestGpuNumber) {
    ResourceDescriptor nodeAvailable = node.getAvailableResource();
    assert (requestGpuNumber <= nodeAvailable.getGpuNumber());
    Long selectedGpuAttribute = 0L;
    Long availableGpuAttribute = nodeAvailable.getGpuAttribute();
    // the communication cost among Gpus.
    for (int i = 0; i < requestGpuNumber; i++) {
        selectedGpuAttribute += (availableGpuAttribute - (availableGpuAttribute & (availableGpuAttribute - 1)));
        availableGpuAttribute &= (availableGpuAttribute - 1);
    }
    return selectedGpuAttribute;
}
Also used : ResourceDescriptor(com.microsoft.frameworklauncher.common.model.ResourceDescriptor) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 8 with ResourceDescriptor

use of com.microsoft.frameworklauncher.common.model.ResourceDescriptor in project pai by Microsoft.

the class SelectionManager method filterNodesForNoneGpuJob.

private void filterNodesForNoneGpuJob(int jobTotalRequestGpu) {
    if (jobTotalRequestGpu == 0) {
        for (int i = filteredNodes.size() - 1; i >= 0; i--) {
            Node node = allNodes.get(filteredNodes.get(i));
            ResourceDescriptor totalResource = node.getTotalResource();
            if (totalResource.getGpuNumber() > 0) {
                LOGGER.logDebug("skip gpu node for none gpu job: Node [%s], Node total resource: [%s]", node.getHost(), totalResource);
                filteredNodes.remove(i);
            }
        }
    }
}
Also used : ResourceDescriptor(com.microsoft.frameworklauncher.common.model.ResourceDescriptor)

Example 9 with ResourceDescriptor

use of com.microsoft.frameworklauncher.common.model.ResourceDescriptor in project pai by Microsoft.

the class SelectionManager method select.

public synchronized SelectionResult select(String taskRoleName) throws NotAvailableException {
    ResourceDescriptor requestResource = requestManager.getTaskResources().get(taskRoleName);
    LOGGER.logInfo("Select: TaskRole: [%s] Resource: [%s]", taskRoleName, requestResource);
    String requestNodeLabel = requestManager.getTaskPlatParams().get(taskRoleName).getTaskNodeLabel();
    String requestNodeGpuType = requestManager.getTaskPlatParams().get(taskRoleName).getTaskNodeGpuType();
    Map<String, NodeConfiguration> configuredNodes = requestManager.getClusterConfiguration().getNodes();
    int startStatesTaskCount = statusManager.getStartStatesTaskCount(taskRoleName);
    List<ValueRange> reusePorts = null;
    // Prefer to use previous successfully associated ports. if no associated ports, try to reuse the "Requesting" ports.
    if (requestManager.getTaskRoles().get(taskRoleName).getUseTheSamePorts()) {
        reusePorts = statusManager.getLiveAssociatedContainerPorts(taskRoleName);
        if (ValueRangeUtils.getValueNumber(reusePorts) <= 0 && previousRequestedPorts.containsKey(taskRoleName)) {
            reusePorts = previousRequestedPorts.get(taskRoleName);
            // the cache only guide the next task to use previous requesting port.
            previousRequestedPorts.remove(taskRoleName);
        }
    }
    SelectionResult result = select(requestResource, requestNodeLabel, requestNodeGpuType, startStatesTaskCount, reusePorts, configuredNodes);
    if (requestManager.getTaskRoles().get(taskRoleName).getUseTheSamePorts()) {
        // reusePortsTimes time is used to avoid startStatesTaskCount not decrease in the situation of timeout tasks back to startStates.
        if (startStatesTaskCount > 1) {
            if (reusePortsTimes == 0) {
                reusePortsTimes = startStatesTaskCount;
            }
            // If there has other tasks waiting, push current ports to previousRequestedPorts.
            if (reusePortsTimes > 1) {
                previousRequestedPorts.put(taskRoleName, result.getOptimizedResource().getPortRanges());
            }
            reusePortsTimes--;
        }
    }
    return result;
}
Also used : ValueRange(com.microsoft.frameworklauncher.common.model.ValueRange) NodeConfiguration(com.microsoft.frameworklauncher.common.model.NodeConfiguration) ResourceDescriptor(com.microsoft.frameworklauncher.common.model.ResourceDescriptor)

Example 10 with ResourceDescriptor

use of com.microsoft.frameworklauncher.common.model.ResourceDescriptor in project pai by Microsoft.

the class SelectionManager method filterNodesByResource.

private void filterNodesByResource(ResourceDescriptor requestResource, Boolean skipLocalTriedResource) {
    if (requestResource != null) {
        for (int i = filteredNodes.size() - 1; i >= 0; i--) {
            Node node = allNodes.get(filteredNodes.get(i));
            ResourceDescriptor availableResource = YamlUtils.deepCopy(node.getAvailableResource(), ResourceDescriptor.class);
            if (skipLocalTriedResource && localTriedResource.containsKey(node.getHost())) {
                LOGGER.logDebug("Skip local tried resources: [%s] on Node : [%s]", localTriedResource.get(node.getHost()), node.getHost());
                availableResource = ResourceDescriptor.subtract(availableResource, localTriedResource.get(node.getHost()));
            }
            if (!ResourceDescriptor.fitsIn(requestResource, availableResource)) {
                LOGGER.logDebug("Resource does not fit in: Node: [%s] Request Resource: [%s], Available Resource: [%s]", node.getHost(), requestResource, availableResource);
                filteredNodes.remove(i);
            }
        }
    }
}
Also used : ResourceDescriptor(com.microsoft.frameworklauncher.common.model.ResourceDescriptor)

Aggregations

ResourceDescriptor (com.microsoft.frameworklauncher.common.model.ResourceDescriptor)10 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 NotAvailableException (com.microsoft.frameworklauncher.common.exceptions.NotAvailableException)2 NodeConfiguration (com.microsoft.frameworklauncher.common.model.NodeConfiguration)2 ValueRange (com.microsoft.frameworklauncher.common.model.ValueRange)2 Test (org.junit.Test)2 Method (java.lang.reflect.Method)1 Resource (org.apache.hadoop.yarn.api.records.Resource)1