Search in sources :

Example 21 with Twister2Exception

use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.

the class ZKEventsManager method getNumberOfPastEvents.

public static int getNumberOfPastEvents(CuratorFramework client, String rootPath, String jobID) throws Twister2Exception {
    String eventsDir = ZKUtils.eventsDir(rootPath, jobID);
    try {
        int numberOfPastEvents = client.getChildren().forPath(eventsDir).size();
        LOG.info("Number of past events: " + numberOfPastEvents);
        return numberOfPastEvents;
    } catch (Exception e) {
        throw new Twister2Exception("Could not get children of events directory: " + eventsDir, e);
    }
}
Also used : Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception)

Example 22 with Twister2Exception

use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.

the class JobMasterExample method main.

/**
 * this main method is for locally testing only
 * A JobMaster instance is started locally on the default port:
 * edu.iu.dsc.tws.master.JobMasterContext.JOB_MASTER_PORT_DEFAULT = 11011
 * <p>
 * numberOfWorkers to join is expected as a parameter
 * <p>
 * When all workers joined and all have sent completed messages,
 * this server also completes and exits
 * <p>
 * En example usage of JobMaster can be seen in:
 * edu.iu.dsc.tws.rsched.schedulers.k8s.master.JobMasterStarter
 */
public static void main(String[] args) {
    if (args.length != 1) {
        LOG.info("usage: java JobMasterExample numberOfWorkers");
        return;
    }
    int numberOfWorkers = Integer.parseInt(args[0]);
    String host = "0.0.0.0";
    // we assume that the twister2Home is the current directory
    // String configDir = "../twister2/config/src/yaml/";
    String configDir = "";
    String twister2Home = Paths.get(configDir).toAbsolutePath().toString();
    Config config = ConfigLoader.loadConfig(twister2Home, "conf", "kubernetes");
    config = JobMasterClientExample.updateConfig(config, config, host);
    LOG.info("Loaded: " + config.size() + " configuration parameters.");
    // Twister2Job twister2Job = Twister2Job.loadTwister2Job(config, null);
    Twister2Job twister2Job = Twister2Job.newBuilder().setJobName("hello-world-job").setWorkerClass(HelloWorld.class).addComputeResource(.2, 128, numberOfWorkers).build();
    twister2Job.setUserName(System.getProperty("user.name"));
    JobAPI.Job job = twister2Job.serialize();
    LOG.info("JobID: " + job.getJobId());
    JobMasterAPI.JobMasterState initialState = JobMasterAPI.JobMasterState.JM_STARTED;
    JobMasterStarter.job = job;
    if (ZKContext.isZooKeeperServerUsed(config)) {
        if ("start".equalsIgnoreCase(args[0])) {
            JobMasterStarter.initializeZooKeeper(config, job.getJobId(), host, initialState);
        } else if ("restart".equalsIgnoreCase(args[0])) {
            initialState = JobMasterAPI.JobMasterState.JM_RESTARTED;
            JobMasterStarter.initializeZooKeeper(config, job.getJobId(), host, initialState);
            job = JobMasterStarter.job;
        } else {
            LOG.info("usage: java JobMasterExample start/restart");
            return;
        }
    }
    // write jobID to file
    String dir = System.getProperty("user.home") + "/.twister2";
    if (!FileUtils.isDirectoryExists(dir)) {
        FileUtils.createDirectory(dir);
    }
    String filename = dir + "/last-job-id.txt";
    FileUtils.writeToFile(filename, (job.getJobId() + "").getBytes(), true);
    LOG.info("Written jobID to file: " + job.getJobId());
    String ip = null;
    try {
        ip = Inet4Address.getLocalHost().getHostAddress();
    } catch (UnknownHostException e) {
        LOG.log(Level.SEVERE, e.getMessage(), e);
        return;
    }
    JobMasterAPI.NodeInfo jobMasterNode = NodeInfoUtils.createNodeInfo(ip, null, null);
    KubernetesController controller = KubernetesController.init("default");
    K8sScaler k8sScaler = new K8sScaler(config, job, controller);
    IJobTerminator jobTerminator = new NullTerminator();
    JobMaster jobMaster = new JobMaster(config, host, jobTerminator, job, jobMasterNode, k8sScaler, initialState);
    try {
        // jobMaster.startJobMasterThreaded();
        jobMaster.startJobMasterBlocking();
    } catch (Twister2Exception e) {
        LOG.log(Level.SEVERE, "Exception when starting Job master: ", e);
        throw new RuntimeException(e);
    }
    LOG.info("Threaded Job Master started:" + "\nnumberOfWorkers: " + job.getNumberOfWorkers() + "\njobID: " + job.getJobId());
}
Also used : JobMaster(edu.iu.dsc.tws.master.server.JobMaster) Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) UnknownHostException(java.net.UnknownHostException) Config(edu.iu.dsc.tws.api.config.Config) KubernetesController(edu.iu.dsc.tws.rsched.schedulers.k8s.KubernetesController) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) K8sScaler(edu.iu.dsc.tws.rsched.schedulers.k8s.driver.K8sScaler) JobMasterAPI(edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI) IJobTerminator(edu.iu.dsc.tws.master.IJobTerminator) NullTerminator(edu.iu.dsc.tws.rsched.schedulers.NullTerminator)

Example 23 with Twister2Exception

use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.

the class ZKPersStateManager method removeScaledDownZNodes.

/**
 * When a job is scaled down, we must delete the znodes of killed workers.
 * minID inclusive, maxID exclusive
 */
public static void removeScaledDownZNodes(CuratorFramework client, String rootPath, String jobID, int minID, int maxID) throws Twister2Exception {
    String checkPath = ZKUtils.persDir(rootPath, jobID);
    for (int workerID = minID; workerID < maxID; workerID++) {
        String workerCheckPath = ZKUtils.workerPath(checkPath, workerID);
        try {
            // not sure whether we need to check the existence
            if (client.checkExists().forPath(workerCheckPath) != null) {
                client.delete().forPath(workerCheckPath);
                LOG.info("Worker PersStateDir deleted: " + workerCheckPath);
            }
        } catch (Exception e) {
            throw new Twister2Exception("Worker PersStateDir cannot be deleted: " + workerCheckPath, e);
        }
    }
}
Also used : Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception)

Example 24 with Twister2Exception

use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.

the class ZKPersStateManager method updateWorkerStatus.

public static boolean updateWorkerStatus(CuratorFramework client, String rootPath, String jobID, WorkerInfo workerInfo, int restartCount, WorkerState newStatus) throws Twister2Exception {
    String workersPersDir = ZKUtils.persDir(rootPath, jobID);
    String workerPersPath = ZKUtils.workerPath(workersPersDir, workerInfo.getWorkerID());
    WorkerWithState workerWithState = new WorkerWithState(workerInfo, newStatus, restartCount);
    try {
        client.setData().forPath(workerPersPath, workerWithState.toByteArray());
        LOG.info("Worker status changed to: " + newStatus);
        return true;
    } catch (Exception e) {
        throw new Twister2Exception("Could not update worker status in znode: " + workerInfo.getWorkerID(), e);
    }
}
Also used : Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception)

Example 25 with Twister2Exception

use of edu.iu.dsc.tws.api.exceptions.Twister2Exception in project twister2 by DSC-SPIDAL.

the class ZKPersStateManager method getWorkers.

/**
 * return all registered workers
 */
public static LinkedList<WorkerWithState> getWorkers(CuratorFramework client, String rootPath, String jobID) throws Twister2Exception {
    String workersPersDir = ZKUtils.persDir(rootPath, jobID);
    try {
        List<String> children = client.getChildren().forPath(workersPersDir);
        LinkedList<WorkerWithState> workers = new LinkedList();
        for (String childName : children) {
            String childPath = workersPersDir + "/" + childName;
            byte[] workerNodeBody = client.getData().forPath(childPath);
            WorkerWithState workerWithState = WorkerWithState.decode(workerNodeBody);
            workers.add(workerWithState);
        }
        return workers;
    } catch (Exception e) {
        throw new Twister2Exception("Could not get persistent worker znode data: " + workersPersDir, e);
    }
}
Also used : Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception) LinkedList(java.util.LinkedList) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) Twister2Exception(edu.iu.dsc.tws.api.exceptions.Twister2Exception)

Aggregations

Twister2Exception (edu.iu.dsc.tws.api.exceptions.Twister2Exception)36 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)24 JobMasterAPI (edu.iu.dsc.tws.proto.jobmaster.JobMasterAPI)14 JobMaster (edu.iu.dsc.tws.master.server.JobMaster)7 NullTerminator (edu.iu.dsc.tws.rsched.schedulers.NullTerminator)5 IScalerPerCluster (edu.iu.dsc.tws.api.driver.IScalerPerCluster)4 NullScaler (edu.iu.dsc.tws.api.driver.NullScaler)4 UnknownHostException (java.net.UnknownHostException)4 Config (edu.iu.dsc.tws.api.config.Config)3 K8sScaler (edu.iu.dsc.tws.rsched.schedulers.k8s.driver.K8sScaler)3 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)2 JobFaultyException (edu.iu.dsc.tws.api.exceptions.JobFaultyException)2 TimeoutException (edu.iu.dsc.tws.api.exceptions.TimeoutException)2 IController (edu.iu.dsc.tws.api.scheduler.IController)2 KubernetesController (edu.iu.dsc.tws.rsched.schedulers.k8s.KubernetesController)2 LinkedList (java.util.LinkedList)2 ChildData (org.apache.curator.framework.recipes.cache.ChildData)2 PathChildrenCache (org.apache.curator.framework.recipes.cache.PathChildrenCache)2 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)1 StateStore (edu.iu.dsc.tws.api.checkpointing.StateStore)1