Search in sources :

Example 1 with CheckpointManager

use of edu.iu.dsc.tws.checkpointing.master.CheckpointManager in project twister2 by DSC-SPIDAL.

the class JobMaster method init.

/**
 * initialize the Job Master
 */
private void init() throws Twister2Exception {
    looper = new Progress();
    // if Dashboard is used, register this job with that
    if (dashClient != null) {
        boolean registered = dashClient.registerJob(job, nodeInfo);
        if (!registered) {
            LOG.warning("Not using Dashboard since it can not register with it.");
            dashClient = null;
        }
    }
    ServerConnectHandler connectHandler = new ServerConnectHandler();
    int backLog = Math.min(job.getNumberOfWorkers() / 2, MAX_BACK_LOG);
    rrServer = new RRServer(config, jmAddress, masterPort, looper, JOB_MASTER_ID, connectHandler, backLog);
    // init Driver if it exists
    // this ha to be done before WorkerMonitor initialization
    initDriver();
    JobFailureWatcher jobFailureWatcher = new JobFailureWatcher();
    workerMonitor = new WorkerMonitor(this, rrServer, dashClient, zkJobUpdater, job, driver, jobFailureWatcher);
    workerHandler = new JMWorkerHandler(workerMonitor, rrServer, ZKContext.isZooKeeperServerUsed(config));
    if (!ZKContext.isZooKeeperServerUsed(config)) {
        workerMonitor.setWorkerEventSender(workerHandler);
    }
    // initialize BarrierMonitor
    barrierMonitor = new BarrierMonitor(workerMonitor, jobFailureWatcher);
    if (ZKContext.isZooKeeperServerUsed(config)) {
        zkBarrierHandler = new ZKBarrierHandler(barrierMonitor, config, job.getJobId(), job.getNumberOfWorkers());
        barrierMonitor.setBarrierResponder(zkBarrierHandler);
        zkBarrierHandler.initialize(initialState);
    } else {
        JMBarrierHandler jmBarrierHandler = new JMBarrierHandler(rrServer, barrierMonitor);
        barrierMonitor.setBarrierResponder(jmBarrierHandler);
    }
    jobFailureWatcher.addJobFaultListener(barrierMonitor);
    // if ZoKeeper server is used for this job, initialize that
    try {
        initZKMasterController(workerMonitor);
    } catch (Twister2Exception e) {
        throw e;
    }
    // initialize checkpoint manager
    if (CheckpointingContext.isCheckpointingEnabled(config)) {
        StateStore stateStore = CheckpointUtils.getStateStore(config);
        stateStore.init(config, "checkpoint-manager");
        this.checkpointManager = new CheckpointManager(this.rrServer, stateStore, job.getJobId());
        jobFailureWatcher.addJobFaultListener(this.checkpointManager);
        LOG.info("Checkpoint manager initialized");
        this.checkpointManager.init();
    }
    // done initializing checkpoint manager
    rrServer.start();
    looper.loop();
}
Also used : Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) Progress(edu.iu.dsc.tws.common.net.tcp.Progress) RRServer(edu.iu.dsc.tws.common.net.tcp.request.RRServer) CheckpointManager(edu.iu.dsc.tws.checkpointing.master.CheckpointManager) StateStore(edu.iu.dsc.tws.api.checkpointing.StateStore) JMBarrierHandler(edu.iu.dsc.tws.master.barrier.JMBarrierHandler) BarrierMonitor(edu.iu.dsc.tws.master.barrier.BarrierMonitor) ZKBarrierHandler(edu.iu.dsc.tws.master.barrier.ZKBarrierHandler)

Aggregations

StateStore (edu.iu.dsc.tws.api.checkpointing.StateStore)1 Twister2Exception (edu.iu.dsc.tws.api.exceptions.Twister2Exception)1 CheckpointManager (edu.iu.dsc.tws.checkpointing.master.CheckpointManager)1 Progress (edu.iu.dsc.tws.common.net.tcp.Progress)1 RRServer (edu.iu.dsc.tws.common.net.tcp.request.RRServer)1 BarrierMonitor (edu.iu.dsc.tws.master.barrier.BarrierMonitor)1 JMBarrierHandler (edu.iu.dsc.tws.master.barrier.JMBarrierHandler)1 ZKBarrierHandler (edu.iu.dsc.tws.master.barrier.ZKBarrierHandler)1