Search in sources :

Example 1 with ExecuteStageRequest

use of io.mantisrx.server.core.ExecuteStageRequest in project mantis by Netflix.

the class VirtualMachineMasterServiceMesosImpl method createTaskInfo.

private Collection<TaskInfo> createTaskInfo(Protos.SlaveID slaveID, final LaunchTaskRequest launchTaskRequest) throws LaunchTaskException {
    final ScheduleRequest scheduleRequest = launchTaskRequest.getScheduleRequest();
    String name = scheduleRequest.getWorkerId().getJobCluster() + " (stage: " + scheduleRequest.getStageNum() + " of " + scheduleRequest.getJobMetadata().getTotalStages() + ")";
    TaskID taskId = TaskID.newBuilder().setValue(scheduleRequest.getWorkerId().getId()).build();
    MachineDefinition machineDefinition = scheduleRequest.getMachineDefinition();
    // grab ports within range
    List<Integer> ports = launchTaskRequest.getPorts().getAllPorts();
    TaskInfo taskInfo = null;
    try {
        TaskInfo.Builder taskInfoBuilder = TaskInfo.newBuilder();
        ExecuteStageRequest executeStageRequest = new ExecuteStageRequest(scheduleRequest.getWorkerId().getJobCluster(), scheduleRequest.getWorkerId().getJobId(), scheduleRequest.getWorkerId().getWorkerIndex(), scheduleRequest.getWorkerId().getWorkerNum(), scheduleRequest.getJobMetadata().getJobJarUrl(), scheduleRequest.getStageNum(), scheduleRequest.getJobMetadata().getTotalStages(), ports, getTimeoutSecsToReportStart(), launchTaskRequest.getPorts().getMetricsPort(), scheduleRequest.getJobMetadata().getParameters(), scheduleRequest.getJobMetadata().getSchedulingInfo(), scheduleRequest.getDurationType(), scheduleRequest.getJobMetadata().getSubscriptionTimeoutSecs(), scheduleRequest.getJobMetadata().getMinRuntimeSecs() - (System.currentTimeMillis() - scheduleRequest.getJobMetadata().getMinRuntimeSecs()), launchTaskRequest.getPorts());
        taskInfoBuilder.setName(name).setTaskId(taskId).setSlaveId(slaveID).addResources(Resource.newBuilder().setName("cpus").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getCpuCores()))).addResources(Resource.newBuilder().setName("mem").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getMemoryMB()))).addResources(Resource.newBuilder().setName("disk").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getDiskMB()))).addResources(Resource.newBuilder().setName("network").setType(Value.Type.SCALAR).setScalar(Value.Scalar.newBuilder().setValue(machineDefinition.getNetworkMbps()))).setExecutor(createMantisWorkerExecutor(executeStageRequest, launchTaskRequest, machineDefinition.getMemoryMB(), machineDefinition.getCpuCores())).setData(ByteString.copyFrom(mapper.writeValueAsBytes(executeStageRequest)));
        if (!ports.isEmpty()) {
            for (Integer port : ports) {
                // add ports
                taskInfoBuilder.addResources(Resource.newBuilder().setName("ports").setType(Value.Type.RANGES).setRanges(Value.Ranges.newBuilder().addRange(Value.Range.newBuilder().setBegin(port).setEnd(port))));
            }
        }
        taskInfo = taskInfoBuilder.build();
    } catch (JsonProcessingException e) {
        throw new LaunchTaskException("Failed to build a TaskInfo instance: " + e.getMessage(), e);
    }
    List<TaskInfo> tasks = new ArrayList<>(1);
    tasks.add(taskInfo);
    return tasks;
}
Also used : TaskID(org.apache.mesos.Protos.TaskID) MachineDefinition(io.mantisrx.runtime.MachineDefinition) ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) ArrayList(java.util.ArrayList) LaunchTaskException(io.mantisrx.server.master.LaunchTaskException) ByteString(com.google.protobuf.ByteString) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) TaskInfo(org.apache.mesos.Protos.TaskInfo) JsonProcessingException(io.mantisrx.shaded.com.fasterxml.jackson.core.JsonProcessingException)

Example 2 with ExecuteStageRequest

use of io.mantisrx.server.core.ExecuteStageRequest in project mantis by Netflix.

the class WorkerExecutionOperationsNetworkStage method executeStage.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void executeStage(final ExecutionDetails setup) {
    ExecuteStageRequest executionRequest = setup.getExecuteStageRequest().getRequest();
    // Initialize the schedulingInfo observable for current job and mark it shareable to be reused by anyone interested in this data.
    // Observable<JobSchedulingInfo> selfSchedulingInfo = mantisMasterApi.schedulingChanges(executionRequest.getJobId()).switchMap((e) -> Observable.just(e).repeatWhen(x -> x.delay(5 , TimeUnit.SECONDS))).subscribeOn(Schedulers.io()).share();
    Observable<JobSchedulingInfo> selfSchedulingInfo = mantisMasterApi.schedulingChanges(executionRequest.getJobId()).subscribeOn(Schedulers.io()).share();
    WorkerInfo workerInfo = generateWorkerInfo(executionRequest.getJobName(), executionRequest.getJobId(), executionRequest.getStage(), executionRequest.getWorkerIndex(), executionRequest.getWorkerNumber(), executionRequest.getDurationType(), "host", executionRequest.getWorkerPorts());
    final Observable<Integer> sourceStageTotalWorkersObs = createSourceStageTotalWorkersObservable(selfSchedulingInfo);
    RunningWorker.Builder rwBuilder = new RunningWorker.Builder().job(setup.getMantisJob()).schedulingInfo(executionRequest.getSchedulingInfo()).stageTotalWorkersObservable(sourceStageTotalWorkersObs).jobName(executionRequest.getJobName()).stageNum(executionRequest.getStage()).workerIndex(executionRequest.getWorkerIndex()).workerNum(executionRequest.getWorkerNumber()).totalStages(executionRequest.getTotalNumStages()).metricsPort(executionRequest.getMetricsPort()).ports(executionRequest.getPorts().iterator()).jobStatusObserver(setup.getStatus()).requestSubject(setup.getExecuteStageRequest().getRequestSubject()).workerInfo(workerInfo).vmTaskStatusObservable(vmTaskStatusObserver).hasJobMaster(executionRequest.getHasJobMaster()).jobId(executionRequest.getJobId());
    if (executionRequest.getStage() == 0) {
        rwBuilder = rwBuilder.stage(new JobMasterStageConfig("jobmasterconfig"));
    } else {
        rwBuilder = rwBuilder.stage((StageConfig) setup.getMantisJob().getStages().get(executionRequest.getStage() - 1));
    }
    final RunningWorker rw = rwBuilder.build();
    AtomicReference<SubscriptionStateHandler> subscriptionStateHandlerRef = new AtomicReference<>();
    if (rw.getStageNum() == rw.getTotalStagesNet()) {
        // set up subscription state handler only for sink (last) stage
        subscriptionStateHandlerRef.set(setupSubscriptionStateHandler(setup.getExecuteStageRequest().getRequest().getJobId(), mantisMasterApi, setup.getExecuteStageRequest().getRequest().getSubscriptionTimeoutSecs(), setup.getExecuteStageRequest().getRequest().getMinRuntimeSecs()));
    }
    logger.info("Running worker info: " + rw);
    rw.signalStartedInitiated();
    try {
        logger.info(">>>>>>>>>>>>>>>>Calling lifecycle.startup()");
        Lifecycle lifecycle = rw.getJob().getLifecycle();
        lifecycle.startup();
        ServiceLocator serviceLocator = lifecycle.getServiceLocator();
        if (lookupSpectatorRegistry) {
            try {
                final Registry spectatorRegistry = serviceLocator.service(Registry.class);
                SpectatorRegistryFactory.setRegistry(spectatorRegistry);
            } catch (Throwable t) {
                logger.error("failed to init spectator registry using service locator, falling back to {}", SpectatorRegistryFactory.getRegistry().getClass().getCanonicalName());
            }
        }
        // create job context
        Parameters parameters = ParameterUtils.createContextParameters(rw.getJob().getParameterDefinitions(), setup.getParameters());
        final Context context = generateContext(parameters, serviceLocator, workerInfo, MetricsRegistry.getInstance(), () -> {
            rw.signalCompleted();
            // wait for completion signal to go to the master and us getting killed. Upon timeout, exit.
            try {
                Thread.sleep(60000);
            } catch (InterruptedException ie) {
                logger.warn("Unexpected exception sleeping: " + ie.getMessage());
            }
            System.exit(0);
        }, createWorkerMapObservable(selfSchedulingInfo, executionRequest.getJobName(), executionRequest.getJobId(), executionRequest.getDurationType()));
        // context.setPrevStageCompletedObservable(createPrevStageCompletedObservable(selfSchedulingInfo, rw.getJobId(), rw.getStageNum()));
        rw.setContext(context);
        // setup heartbeats
        heartbeatRef.set(new Heartbeat(rw.getJobId(), rw.getStageNum(), rw.getWorkerIndex(), rw.getWorkerNum()));
        final double networkMbps = executionRequest.getSchedulingInfo().forStage(rw.getStageNum()).getMachineDefinition().getNetworkMbps();
        startSendingHeartbeats(rw.getJobStatus(), new WorkerId(executionRequest.getJobId(), executionRequest.getWorkerIndex(), executionRequest.getWorkerNumber()).getId(), networkMbps);
        // execute stage
        if (rw.getStageNum() == 0) {
            logger.info("JobId: " + rw.getJobId() + ", executing Job Master");
            final AutoScaleMetricsConfig autoScaleMetricsConfig = new AutoScaleMetricsConfig();
            // Temporary workaround to enable auto-scaling by custom metric in Job Master. This will be revisited to get the entire autoscaling config
            // for a job as a System parameter in the JobMaster
            final String autoScaleMetricString = (String) parameters.get(JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM, "");
            if (!Strings.isNullOrEmpty(autoScaleMetricString)) {
                final List<String> tokens = Splitter.on("::").omitEmptyStrings().trimResults().splitToList(autoScaleMetricString);
                if (tokens.size() == 3) {
                    final String metricGroup = tokens.get(0);
                    final String metricName = tokens.get(1);
                    final String algo = tokens.get(2);
                    try {
                        final AutoScaleMetricsConfig.AggregationAlgo aggregationAlgo = AutoScaleMetricsConfig.AggregationAlgo.valueOf(algo);
                        logger.info("registered UserDefined auto scale metric {}:{} algo {}", metricGroup, metricName, aggregationAlgo);
                        autoScaleMetricsConfig.addUserDefinedMetric(metricGroup, metricName, aggregationAlgo);
                    } catch (IllegalArgumentException e) {
                        final String errorMsg = String.format("ERROR: Invalid algorithm value %s for param %s (algo should be one of %s)", autoScaleMetricsConfig, JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM, Arrays.stream(AutoScaleMetricsConfig.AggregationAlgo.values()).map(a -> a.name()).collect(Collectors.toList()));
                        logger.error(errorMsg);
                        throw new RuntimeException(errorMsg);
                    }
                } else {
                    final String errorMsg = String.format("ERROR: Invalid value %s for param %s", autoScaleMetricString, JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM);
                    logger.error(errorMsg);
                    throw new RuntimeException(errorMsg);
                }
            } else {
                logger.info("param {} is null or empty", JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM);
            }
            final JobMasterService jobMasterService = new JobMasterService(rw.getJobId(), rw.getSchedulingInfo(), workerMetricsClient, autoScaleMetricsConfig, mantisMasterApi, rw.getContext(), rw.getOnCompleteCallback(), rw.getOnErrorCallback(), rw.getOnTerminateCallback());
            jobMasterService.start();
            signalStarted(rw, subscriptionStateHandlerRef);
            // block until worker terminates
            rw.waitUntilTerminate();
        } else if (rw.getStageNum() == 1 && rw.getTotalStagesNet() == 1) {
            logger.info("JobId: " + rw.getJobId() + ", single stage job, executing entire job");
            // single stage, execute entire job on this machine
            PortSelector portSelector = new PortSelector() {

                @Override
                public int acquirePort() {
                    return rw.getPorts().next();
                }
            };
            RxMetrics rxMetrics = new RxMetrics();
            StageExecutors.executeSingleStageJob(rw.getJob().getSource(), rw.getStage(), rw.getJob().getSink(), portSelector, rxMetrics, rw.getContext(), rw.getOnTerminateCallback(), rw.getWorkerIndex(), rw.getSourceStageTotalWorkersObservable(), onSinkSubscribe, onSinkUnsubscribe, rw.getOnCompleteCallback(), rw.getOnErrorCallback());
            signalStarted(rw, subscriptionStateHandlerRef);
            // block until worker terminates
            rw.waitUntilTerminate();
        } else {
            logger.info("JobId: " + rw.getJobId() + ", executing a multi-stage job, stage: " + rw.getStageNum());
            if (rw.getStageNum() == 1) {
                // execute source stage
                String remoteObservableName = rw.getJobId() + "_" + rw.getStageNum();
                StageSchedulingInfo currentStageSchedulingInfo = rw.getSchedulingInfo().forStage(1);
                WorkerPublisherRemoteObservable publisher = new WorkerPublisherRemoteObservable<>(rw.getPorts().next(), remoteObservableName, numWorkersAtStage(selfSchedulingInfo, rw.getJobId(), rw.getStageNum() + 1), rw.getJobName());
                StageExecutors.executeSource(rw.getWorkerIndex(), rw.getJob().getSource(), rw.getStage(), publisher, rw.getContext(), rw.getSourceStageTotalWorkersObservable());
                logger.info("JobId: " + rw.getJobId() + " stage: " + rw.getStageNum() + ", serving remote observable for source with name: " + remoteObservableName);
                RemoteRxServer server = publisher.getServer();
                RxMetrics rxMetrics = server.getMetrics();
                MetricsRegistry.getInstance().registerAndGet(rxMetrics.getCountersAndGauges());
                signalStarted(rw, subscriptionStateHandlerRef);
                logger.info("JobId: " + rw.getJobId() + " stage: " + rw.getStageNum() + ", blocking until source observable completes");
                server.blockUntilServerShutdown();
            } else {
                // execute intermediate stage or last stage plus sink
                executeNonSourceStage(selfSchedulingInfo, rw, subscriptionStateHandlerRef);
            }
        }
        logger.info("Calling lifecycle.shutdown()");
        lifecycle.shutdown();
    } catch (Throwable t) {
        rw.signalFailed(t);
        shutdownStage();
    }
}
Also used : Strings(io.mantisrx.shaded.com.google.common.base.Strings) Arrays(java.util.Arrays) MantisJobDurationType(io.mantisrx.runtime.MantisJobDurationType) MantisJobState(io.mantisrx.runtime.MantisJobState) LoggerFactory(org.slf4j.LoggerFactory) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) JobMasterStageConfig(io.mantisrx.server.worker.jobmaster.JobMasterStageConfig) Lifecycle(io.mantisrx.runtime.lifecycle.Lifecycle) WorkerConsumer(io.mantisrx.runtime.executor.WorkerConsumer) ServiceRegistry(io.mantisrx.server.core.ServiceRegistry) JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM(io.mantisrx.runtime.parameter.ParameterUtils.JOB_MASTER_AUTOSCALE_METRIC_SYSTEM_PARAM) WorkerPorts(io.mantisrx.common.WorkerPorts) ParameterUtils(io.mantisrx.runtime.parameter.ParameterUtils) Map(java.util.Map) Schedulers(rx.schedulers.Schedulers) VirtualMachineTaskStatus(io.mantisrx.server.worker.mesos.VirtualMachineTaskStatus) RxMetrics(io.reactivex.mantis.remote.observable.RxMetrics) Status(io.mantisrx.server.core.Status) StageExecutors(io.mantisrx.runtime.executor.StageExecutors) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) WorkerAssignments(io.mantisrx.server.core.WorkerAssignments) Observer(rx.Observer) Collectors(java.util.stream.Collectors) JobMasterService(io.mantisrx.server.worker.jobmaster.JobMasterService) WorkerConsumerRemoteObservable(io.mantisrx.runtime.executor.WorkerConsumerRemoteObservable) CountDownLatch(java.util.concurrent.CountDownLatch) WorkerId(io.mantisrx.server.core.domain.WorkerId) List(java.util.List) ToDeltaEndpointInjector(io.reactivex.mantis.remote.observable.ToDeltaEndpointInjector) Action0(rx.functions.Action0) BehaviorSubject(rx.subjects.BehaviorSubject) Splitter(io.mantisrx.shaded.com.google.common.base.Splitter) Optional(java.util.Optional) WorkerMap(io.mantisrx.runtime.WorkerMap) PortSelector(io.mantisrx.runtime.executor.PortSelector) WorkerPublisherRemoteObservable(io.mantisrx.runtime.executor.WorkerPublisherRemoteObservable) StageConfig(io.mantisrx.runtime.StageConfig) MantisMasterClientApi(io.mantisrx.server.master.client.MantisMasterClientApi) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Parameters(io.mantisrx.runtime.parameter.Parameters) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) Observable(rx.Observable) Func1(rx.functions.Func1) WorkerMetricsClient(io.mantisrx.server.worker.client.WorkerMetricsClient) LinkedList(java.util.LinkedList) RemoteRxServer(io.reactivex.mantis.remote.observable.RemoteRxServer) AutoScaleMetricsConfig(io.mantisrx.server.worker.jobmaster.AutoScaleMetricsConfig) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Endpoint(io.mantisrx.common.network.Endpoint) TYPE(io.mantisrx.server.core.Status.TYPE) Context(io.mantisrx.runtime.Context) StatusPayloads(io.mantisrx.server.core.StatusPayloads) TimeUnit(java.util.concurrent.TimeUnit) ServiceLocator(io.mantisrx.runtime.lifecycle.ServiceLocator) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) Registry(com.netflix.spectator.api.Registry) WorkerConfiguration(io.mantisrx.server.worker.config.WorkerConfiguration) SpectatorRegistryFactory(io.mantisrx.common.metrics.spectator.SpectatorRegistryFactory) WorkerInfo(io.mantisrx.runtime.WorkerInfo) WorkerHost(io.mantisrx.server.core.WorkerHost) WorkerPublisherRemoteObservable(io.mantisrx.runtime.executor.WorkerPublisherRemoteObservable) WorkerInfo(io.mantisrx.runtime.WorkerInfo) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) RxMetrics(io.reactivex.mantis.remote.observable.RxMetrics) AutoScaleMetricsConfig(io.mantisrx.server.worker.jobmaster.AutoScaleMetricsConfig) Context(io.mantisrx.runtime.Context) Parameters(io.mantisrx.runtime.parameter.Parameters) JobMasterService(io.mantisrx.server.worker.jobmaster.JobMasterService) PortSelector(io.mantisrx.runtime.executor.PortSelector) JobSchedulingInfo(io.mantisrx.server.core.JobSchedulingInfo) Lifecycle(io.mantisrx.runtime.lifecycle.Lifecycle) JobMasterStageConfig(io.mantisrx.server.worker.jobmaster.JobMasterStageConfig) AtomicReference(java.util.concurrent.atomic.AtomicReference) ServiceRegistry(io.mantisrx.server.core.ServiceRegistry) MetricsRegistry(io.mantisrx.common.metrics.MetricsRegistry) Registry(com.netflix.spectator.api.Registry) WorkerId(io.mantisrx.server.core.domain.WorkerId) RemoteRxServer(io.reactivex.mantis.remote.observable.RemoteRxServer) JobMasterStageConfig(io.mantisrx.server.worker.jobmaster.JobMasterStageConfig) StageConfig(io.mantisrx.runtime.StageConfig) ServiceLocator(io.mantisrx.runtime.lifecycle.ServiceLocator) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo)

Example 3 with ExecuteStageRequest

use of io.mantisrx.server.core.ExecuteStageRequest in project mantis by Netflix.

the class VirtualMachineWorkerServiceLocalImpl method createExecuteStageRequest.

private WrappedExecuteStageRequest createExecuteStageRequest() throws MalformedURLException {
    // TODO make ExecuteStageRequest params configurable
    final long timeoutToReportStartSec = 5;
    final URL jobJarUrl = new URL("file:/Users/nmahilani/Projects/Mantis/mantis-sdk/examples/sine-function/build/distributions/sine-function-1.0.zip");
    final List<Integer> ports = Arrays.asList(31015, 31013, 31014);
    final List<Parameter> params = Collections.singletonList(new Parameter("useRandom", "true"));
    final int numInstances = 1;
    // new MachineDefinition(2, 300, 200, 1024, 2), true));
    final Map<Integer, StageSchedulingInfo> schedulingInfoMap = new HashMap<>();
    final StageSchedulingInfo stage0SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numInstances).machineDefinition(MachineDefinitions.micro()).build();
    final StageSchedulingInfo stage1SchedInfo = StageSchedulingInfo.builder().numberOfInstances(numInstances).machineDefinition(new MachineDefinition(2, 300, 200, 1024, 2)).scalingPolicy(new StageScalingPolicy(1, 1, 5, 1, 1, 30, Collections.singletonMap(StageScalingPolicy.ScalingReason.Memory, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.Memory, 15.0, 25.0, new StageScalingPolicy.RollingCount(1, 2))))).scalable(true).build();
    // schedulingInfoMap.put(0, stage0SchedInfo);
    schedulingInfoMap.put(1, stage1SchedInfo);
    final SchedulingInfo schedInfo = new SchedulingInfo(schedulingInfoMap);
    final ExecuteStageRequest executeStageRequest = new ExecuteStageRequest(workerInfo.getJobName(), workerInfo.getJobId(), workerInfo.getWorkerIndex(), workerInfo.getWorkerNumber(), jobJarUrl, workerInfo.getStageNumber(), workerInfo.getNumStages(), ports, timeoutToReportStartSec, workerInfo.getMetricsPort(), params, schedInfo, MantisJobDurationType.Transient, 0L, 0L, new WorkerPorts(Arrays.asList(7151, 7152, 7153, 7154, 7155)));
    return new WrappedExecuteStageRequest(PublishSubject.<Boolean>create(), executeStageRequest);
}
Also used : MachineDefinition(io.mantisrx.runtime.MachineDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) HashMap(java.util.HashMap) URL(java.net.URL) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) StageScalingPolicy(io.mantisrx.runtime.descriptor.StageScalingPolicy) WorkerPorts(io.mantisrx.common.WorkerPorts) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) Parameter(io.mantisrx.runtime.parameter.Parameter)

Example 4 with ExecuteStageRequest

use of io.mantisrx.server.core.ExecuteStageRequest in project mantis by Netflix.

the class ExecuteStageRequestService method start.

@Override
public void start() {
    subscription = executeStageRequestObservable.map(new Func1<WrappedExecuteStageRequest, TrackedExecuteStageRequest>() {

        @Override
        public TrackedExecuteStageRequest call(WrappedExecuteStageRequest executeRequest) {
            PublishSubject<Status> statusSubject = PublishSubject.create();
            tasksStatusObserver.onNext(statusSubject);
            return new TrackedExecuteStageRequest(executeRequest, statusSubject);
        }
    }).flatMap(new Func1<TrackedExecuteStageRequest, Observable<ExecutionDetails>>() {

        // raw type due to unknown type for mantis job
        @SuppressWarnings("rawtypes")
        @Override
        public Observable<ExecutionDetails> call(TrackedExecuteStageRequest executeRequest) {
            ExecuteStageRequest executeStageRequest = executeRequest.getExecuteRequest().getRequest();
            URL jobJarUrl = executeStageRequest.getJobJarUrl();
            // pull out file name from URL
            String jobJarFile = jobJarUrl.getFile();
            String jarName = jobJarFile.substring(jobJarFile.lastIndexOf('/') + 1);
            // path used to store job on local disk
            Path path = Paths.get("/tmp", "mantis-jobs", executeStageRequest.getJobId(), Integer.toString(executeStageRequest.getWorkerNumber()), "libs");
            URL pathLocation = null;
            try {
                pathLocation = Paths.get(path.toString(), "*").toUri().toURL();
            } catch (MalformedURLException e1) {
                logger.error("Failed to convert path location to URL", e1);
                executeRequest.getStatus().onError(e1);
                return Observable.empty();
            }
            logger.info("Creating job classpath with pathLocation " + pathLocation);
            ClassLoader cl = URLClassLoader.newInstance(new URL[] { pathLocation });
            try {
                if (mantisJob == null) {
                    if (jobProviderClass.isPresent()) {
                        logger.info("loading job main class " + jobProviderClass.get());
                        final Class clazz = Class.forName(jobProviderClass.get());
                        final MantisJobProvider jobProvider = (MantisJobProvider) clazz.newInstance();
                        mantisJob = jobProvider.getJobInstance();
                    } else {
                        logger.info("using serviceLoader to get job instance");
                        ServiceLoader<MantisJobProvider> provider = ServiceLoader.load(MantisJobProvider.class, cl);
                        // should only be a single provider, check is made in master
                        MantisJobProvider mantisJobProvider = provider.iterator().next();
                        mantisJob = mantisJobProvider.getJobInstance();
                    }
                }
            } catch (Throwable e) {
                logger.error("Failed to load job class", e);
                executeRequest.getStatus().onError(e);
                return Observable.empty();
            }
            logger.info("Executing job");
            return Observable.just(new ExecutionDetails(executeRequest.getExecuteRequest(), executeRequest.getStatus(), mantisJob, cl, executeStageRequest.getParameters()));
        }
    }).subscribe(new Observer<ExecutionDetails>() {

        @Override
        public void onCompleted() {
            // should never occur
            logger.error("Execute stage observable completed");
            executionOperations.shutdownStage();
        }

        @Override
        public void onError(Throwable e) {
            logger.error("Execute stage observable threw exception", e);
        }

        @Override
        public void onNext(final ExecutionDetails executionDetails) {
            logger.info("Executing stage for job ID: " + executionDetails.getExecuteStageRequest().getRequest().getJobId());
            Thread t = new Thread("Mantis Worker Thread for " + executionDetails.getExecuteStageRequest().getRequest().getJobId()) {

                @Override
                public void run() {
                    // Add ports here
                    try {
                        executionOperations.executeStage(executionDetails);
                    } catch (Throwable t) {
                        logger.error("Failed to execute job stage", t);
                    }
                }
            };
            // rebuild class path, job jar + parent class loader
            // job jar to reference third party libraries and resources
            // parent to reference worker code
            ClassLoader cl = executionDetails.getClassLoader();
            t.setContextClassLoader(cl);
            t.setDaemon(true);
            t.start();
        }
    });
}
Also used : Status(io.mantisrx.server.core.Status) Path(java.nio.file.Path) MalformedURLException(java.net.MalformedURLException) Observable(rx.Observable) ExecuteStageRequest(io.mantisrx.server.core.ExecuteStageRequest) URL(java.net.URL) ServiceLoader(java.util.ServiceLoader) Observer(rx.Observer) URLClassLoader(java.net.URLClassLoader) MantisJobProvider(io.mantisrx.runtime.MantisJobProvider) Func1(rx.functions.Func1)

Aggregations

ExecuteStageRequest (io.mantisrx.server.core.ExecuteStageRequest)4 WorkerPorts (io.mantisrx.common.WorkerPorts)2 MachineDefinition (io.mantisrx.runtime.MachineDefinition)2 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)2 Status (io.mantisrx.server.core.Status)2 URL (java.net.URL)2 HashMap (java.util.HashMap)2 Observable (rx.Observable)2 Observer (rx.Observer)2 Func1 (rx.functions.Func1)2 ByteString (com.google.protobuf.ByteString)1 Registry (com.netflix.spectator.api.Registry)1 MetricsRegistry (io.mantisrx.common.metrics.MetricsRegistry)1 SpectatorRegistryFactory (io.mantisrx.common.metrics.spectator.SpectatorRegistryFactory)1 Endpoint (io.mantisrx.common.network.Endpoint)1 Context (io.mantisrx.runtime.Context)1 MantisJobDurationType (io.mantisrx.runtime.MantisJobDurationType)1 MantisJobProvider (io.mantisrx.runtime.MantisJobProvider)1 MantisJobState (io.mantisrx.runtime.MantisJobState)1 StageConfig (io.mantisrx.runtime.StageConfig)1