Search in sources :

Example 1 with IStrategy

use of org.apache.storm.scheduler.resource.strategies.scheduling.IStrategy in project storm by apache.

the class ResourceAwareScheduler method scheduleTopology.

private void scheduleTopology(TopologyDetails td, Cluster cluster, final User topologySubmitter, List<TopologyDetails> orderedTopologies, Map<String, Set<String>> tmpEvictedTopologiesMap) {
    // A copy of cluster that we can modify, but does not get committed back to cluster unless scheduling succeeds
    Cluster workingState = new Cluster(cluster);
    RasNodes nodes = new RasNodes(workingState);
    IStrategy rasStrategy = null;
    String strategyConf = (String) td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY);
    try {
        String strategy = (String) td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY);
        if (strategy.startsWith("backtype.storm")) {
            // Storm support to launch workers of older version.
            // If the config of TOPOLOGY_SCHEDULER_STRATEGY comes from the older version, replace the package name.
            strategy = strategy.replace("backtype.storm", "org.apache.storm");
            LOG.debug("Replaced backtype.storm with org.apache.storm for Config.TOPOLOGY_SCHEDULER_STRATEGY");
        }
        rasStrategy = ReflectionUtils.newSchedulerStrategyInstance(strategy, conf);
        rasStrategy.prepare(conf);
    } catch (DisallowedStrategyException e) {
        markFailedTopology(topologySubmitter, cluster, td, "Unsuccessful in scheduling - " + e.getAttemptedClass() + " is not an allowed strategy. Please make sure your " + Config.TOPOLOGY_SCHEDULER_STRATEGY + " config is one of the allowed strategies: " + e.getAllowedStrategies(), e);
        return;
    } catch (RuntimeException e) {
        markFailedTopology(topologySubmitter, cluster, td, "Unsuccessful in scheduling - failed to create instance of topology strategy " + strategyConf + ". Please check logs for details", e);
        return;
    }
    // Log warning here to avoid duplicating / spamming in strategy / scheduling code.
    boolean oneExecutorPerWorker = (Boolean) td.getConf().get(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER);
    boolean oneComponentPerWorker = (Boolean) td.getConf().get(Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER);
    if (oneExecutorPerWorker && oneComponentPerWorker) {
        LOG.warn("Conflicting options: {} and {} are both set! Ignoring {} option.", Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER, Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER);
    }
    TopologySchedulingResources topologySchedulingResources = new TopologySchedulingResources(workingState, td);
    final IStrategy finalRasStrategy = rasStrategy;
    for (int i = 0; i < maxSchedulingAttempts; i++) {
        SingleTopologyCluster toSchedule = new SingleTopologyCluster(workingState, td.getId());
        try {
            SchedulingResult result = null;
            topologySchedulingResources.resetRemaining();
            if (topologySchedulingResources.canSchedule()) {
                Future<SchedulingResult> schedulingFuture = backgroundScheduling.submit(() -> finalRasStrategy.schedule(toSchedule, td));
                try {
                    result = schedulingFuture.get(schedulingTimeoutSeconds, TimeUnit.SECONDS);
                } catch (TimeoutException te) {
                    markFailedTopology(topologySubmitter, cluster, td, "Scheduling took too long for " + td.getId() + " using strategy " + rasStrategy.getClass().getName() + " timeout after " + schedulingTimeoutSeconds + " seconds using config " + DaemonConfig.SCHEDULING_TIMEOUT_SECONDS_PER_TOPOLOGY + ".");
                    schedulingTimeoutMeter.mark();
                    schedulingFuture.cancel(true);
                    return;
                }
            } else {
                result = SchedulingResult.failure(SchedulingStatus.FAIL_NOT_ENOUGH_RESOURCES, "");
            }
            LOG.debug("scheduling result: {}", result);
            if (result == null) {
                markFailedTopology(topologySubmitter, cluster, td, "Internal scheduler error");
                return;
            } else {
                if (result.isSuccess()) {
                    cluster.updateFrom(toSchedule);
                    cluster.setStatus(td.getId(), "Running - " + result.getMessage());
                    // DONE
                    return;
                } else if (result.getStatus() == SchedulingStatus.FAIL_NOT_ENOUGH_RESOURCES) {
                    LOG.debug("Not enough resources to schedule {}", td.getName());
                    List<TopologyDetails> reversedList = ImmutableList.copyOf(orderedTopologies).reverse();
                    LOG.debug("Attempting to make space for topo {} from user {}", td.getName(), td.getTopologySubmitter());
                    int tdIndex = reversedList.indexOf(td);
                    topologySchedulingResources.setRemainingRequiredResources(toSchedule, td);
                    Set<String> tmpEvictedTopos = new HashSet<>();
                    for (int index = 0; index < tdIndex; index++) {
                        TopologyDetails topologyEvict = reversedList.get(index);
                        SchedulerAssignment evictAssignemnt = workingState.getAssignmentById(topologyEvict.getId());
                        if (evictAssignemnt != null && !evictAssignemnt.getSlots().isEmpty()) {
                            topologySchedulingResources.adjustResourcesForEvictedTopology(toSchedule, topologyEvict);
                            tmpEvictedTopos.add(topologyEvict.getId());
                            Collection<WorkerSlot> workersToEvict = workingState.getUsedSlotsByTopologyId(topologyEvict.getId());
                            nodes.freeSlots(workersToEvict);
                            if (topologySchedulingResources.canSchedule()) {
                                // than is needed
                                break;
                            }
                        }
                    }
                    if (!tmpEvictedTopos.isEmpty()) {
                        LOG.warn("Evicted Topologies {} when scheduling topology: {}", tmpEvictedTopos, td.getId());
                        tmpEvictedTopologiesMap.computeIfAbsent(td.getId(), k -> new HashSet<>()).addAll(tmpEvictedTopos);
                    } else {
                        StringBuilder message = new StringBuilder();
                        message.append("Not enough resources to schedule after evicting lower priority topologies. ");
                        message.append(topologySchedulingResources.getRemainingRequiredResourcesMessage());
                        message.append(result.getErrorMessage());
                        markFailedTopology(topologySubmitter, cluster, td, message.toString());
                        return;
                    }
                // Only place we fall though to do the loop over again...
                } else {
                    // Any other failure result
                    topologySubmitter.markTopoUnsuccess(td, cluster, result.toString());
                    return;
                }
            }
        } catch (Exception ex) {
            internalErrorMeter.mark();
            markFailedTopology(topologySubmitter, cluster, td, "Internal Error - Exception thrown when scheduling. Please check logs for details", ex);
            return;
        }
    }
    // We can only reach here when we failed to free enough space by evicting current topologies after {maxSchedulingAttempts}
    // while that scheduler did evict something at each attempt.
    markFailedTopology(topologySubmitter, cluster, td, "Failed to make enough resources for " + td.getId() + " by evicting lower priority topologies within " + maxSchedulingAttempts + " attempts. " + topologySchedulingResources.getRemainingRequiredResourcesMessage());
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) DisallowedStrategyException(org.apache.storm.utils.DisallowedStrategyException) Cluster(org.apache.storm.scheduler.Cluster) SingleTopologyCluster(org.apache.storm.scheduler.SingleTopologyCluster) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) TimeoutException(java.util.concurrent.TimeoutException) DisallowedStrategyException(org.apache.storm.utils.DisallowedStrategyException) IStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.IStrategy) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) SingleTopologyCluster(org.apache.storm.scheduler.SingleTopologyCluster) Collection(java.util.Collection) ImmutableList(org.apache.storm.shade.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with IStrategy

use of org.apache.storm.scheduler.resource.strategies.scheduling.IStrategy in project storm by apache.

the class ResourceAwareScheduler method scheduleTopology.

public void scheduleTopology(TopologyDetails td) {
    User topologySubmitter = this.schedulingState.userMap.get(td.getTopologySubmitter());
    if (this.schedulingState.cluster.getUnassignedExecutors(td).size() > 0) {
        LOG.debug("/********Scheduling topology {} from User {}************/", td.getName(), topologySubmitter);
        SchedulingState schedulingState = checkpointSchedulingState();
        IStrategy rasStrategy = null;
        try {
            rasStrategy = (IStrategy) Utils.newInstance((String) td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY));
        } catch (RuntimeException e) {
            LOG.error("failed to create instance of IStrategy: {} with error: {}! Topology {} will not be scheduled.", td.getName(), td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY), e.getMessage());
            topologySubmitter = cleanup(schedulingState, td);
            topologySubmitter.moveTopoFromPendingToInvalid(td);
            this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - failed to create instance of topology strategy " + td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY) + ". Please check logs for details");
            return;
        }
        IEvictionStrategy evictionStrategy = null;
        while (true) {
            SchedulingResult result = null;
            try {
                // Need to re prepare scheduling strategy with cluster and topologies in case scheduling state was restored
                // Pass in a copy of scheduling state since the scheduling strategy should not be able to be able to make modifications to
                // the state of cluster directly
                rasStrategy.prepare(new SchedulingState(this.schedulingState));
                result = rasStrategy.schedule(td);
            } catch (Exception ex) {
                LOG.error(String.format("Exception thrown when running strategy %s to schedule topology %s. Topology will not be scheduled!", rasStrategy.getClass().getName(), td.getName()), ex);
                topologySubmitter = cleanup(schedulingState, td);
                topologySubmitter.moveTopoFromPendingToInvalid(td);
                this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - Exception thrown when running strategy {}" + rasStrategy.getClass().getName() + ". Please check logs for details");
            }
            LOG.debug("scheduling result: {}", result);
            if (result != null && result.isValid()) {
                if (result.isSuccess()) {
                    try {
                        if (mkAssignment(td, result.getSchedulingResultMap())) {
                            topologySubmitter.moveTopoFromPendingToRunning(td);
                            this.schedulingState.cluster.setStatus(td.getId(), "Running - " + result.getMessage());
                        } else {
                            topologySubmitter = this.cleanup(schedulingState, td);
                            topologySubmitter.moveTopoFromPendingToAttempted(td);
                            this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - Unable to assign executors to nodes. Please check logs for details");
                        }
                    } catch (IllegalStateException ex) {
                        LOG.error("Unsuccessful in scheduling - IllegalStateException thrown when attempting to assign executors to nodes.", ex);
                        topologySubmitter = cleanup(schedulingState, td);
                        topologySubmitter.moveTopoFromPendingToAttempted(td);
                        this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - IllegalStateException thrown when attempting to assign executors to nodes. Please check log for details.");
                    }
                    break;
                } else {
                    if (result.getStatus() == SchedulingStatus.FAIL_NOT_ENOUGH_RESOURCES) {
                        if (evictionStrategy == null) {
                            try {
                                evictionStrategy = (IEvictionStrategy) Utils.newInstance((String) this.conf.get(Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY));
                            } catch (RuntimeException e) {
                                LOG.error("failed to create instance of eviction strategy: {} with error: {}! No topology eviction will be done.", this.conf.get(Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY), e.getMessage());
                                topologySubmitter.moveTopoFromPendingToAttempted(td);
                                break;
                            }
                        }
                        boolean madeSpace = false;
                        try {
                            //need to re prepare since scheduling state might have been restored
                            evictionStrategy.prepare(this.schedulingState);
                            madeSpace = evictionStrategy.makeSpaceForTopo(td);
                        } catch (Exception ex) {
                            LOG.error(String.format("Exception thrown when running eviction strategy %s to schedule topology %s. No evictions will be done! Error: %s", evictionStrategy.getClass().getName(), td.getName(), ex.getClass().getName()), ex);
                            topologySubmitter = cleanup(schedulingState, td);
                            topologySubmitter.moveTopoFromPendingToAttempted(td);
                            break;
                        }
                        if (!madeSpace) {
                            LOG.debug("Could not make space for topo {} will move to attempted", td);
                            topologySubmitter = cleanup(schedulingState, td);
                            topologySubmitter.moveTopoFromPendingToAttempted(td);
                            this.schedulingState.cluster.setStatus(td.getId(), "Not enough resources to schedule - " + result.getErrorMessage());
                            break;
                        }
                        continue;
                    } else if (result.getStatus() == SchedulingStatus.FAIL_INVALID_TOPOLOGY) {
                        topologySubmitter = cleanup(schedulingState, td);
                        topologySubmitter.moveTopoFromPendingToInvalid(td, this.schedulingState.cluster);
                        break;
                    } else {
                        topologySubmitter = cleanup(schedulingState, td);
                        topologySubmitter.moveTopoFromPendingToAttempted(td, this.schedulingState.cluster);
                        break;
                    }
                }
            } else {
                LOG.warn("Scheduling results returned from topology {} is not vaild! Topology with be ignored.", td.getName());
                topologySubmitter = cleanup(schedulingState, td);
                topologySubmitter.moveTopoFromPendingToInvalid(td, this.schedulingState.cluster);
                break;
            }
        }
    } else {
        LOG.warn("Topology {} is already fully scheduled!", td.getName());
        topologySubmitter.moveTopoFromPendingToRunning(td);
        if (this.schedulingState.cluster.getStatusMap().get(td.getId()) == null || this.schedulingState.cluster.getStatusMap().get(td.getId()).equals("")) {
            this.schedulingState.cluster.setStatus(td.getId(), "Fully Scheduled");
        }
    }
}
Also used : IStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.IStrategy) IEvictionStrategy(org.apache.storm.scheduler.resource.strategies.eviction.IEvictionStrategy)

Aggregations

IStrategy (org.apache.storm.scheduler.resource.strategies.scheduling.IStrategy)2 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1 TimeoutException (java.util.concurrent.TimeoutException)1 Cluster (org.apache.storm.scheduler.Cluster)1 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)1 SingleTopologyCluster (org.apache.storm.scheduler.SingleTopologyCluster)1 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)1 IEvictionStrategy (org.apache.storm.scheduler.resource.strategies.eviction.IEvictionStrategy)1 ImmutableList (org.apache.storm.shade.com.google.common.collect.ImmutableList)1 DisallowedStrategyException (org.apache.storm.utils.DisallowedStrategyException)1