Search in sources :

Example 36 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class KMeansMain method main.

public static void main(String[] args) throws ParseException {
    LOG.log(Level.INFO, "KMeans Clustering Job");
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    Options options = new Options();
    options.addOption(DataObjectConstants.WORKERS, true, "Workers");
    options.addOption(DataObjectConstants.CSIZE, true, "Size of the dapoints file");
    options.addOption(DataObjectConstants.DSIZE, true, "Size of the centroids file");
    options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
    options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
    options.addOption(DataObjectConstants.DIMENSIONS, true, "dim");
    options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
    options.addOption(DataObjectConstants.ARGS_ITERATIONS, true, "iter");
    options.addOption(DataObjectConstants.JOB_TYPE, true, "type");
    options.addOption(Utils.createOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.CINPUT_DIRECTORY, true, "Centroids Input directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.OUTPUT_DIRECTORY, true, "Output directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.FILE_SYSTEM, true, "file system", true));
    options.addOption(Utils.createOption(DataObjectConstants.FILE_TYPE, true, "file type", true));
    @SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
    int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
    int csize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.CSIZE));
    int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
    int dimension = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DIMENSIONS));
    int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
    int iterations = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.ARGS_ITERATIONS));
    String jobType = cmd.getOptionValue(DataObjectConstants.JOB_TYPE);
    String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
    String centroidDirectory = cmd.getOptionValue(DataObjectConstants.CINPUT_DIRECTORY);
    String outputDirectory = cmd.getOptionValue(DataObjectConstants.OUTPUT_DIRECTORY);
    String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
    String fileType = cmd.getOptionValue(DataObjectConstants.FILE_TYPE);
    boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
    // build JobConfig
    JobConfig jobConfig = new JobConfig();
    jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
    jobConfig.put(DataObjectConstants.CINPUT_DIRECTORY, centroidDirectory);
    jobConfig.put(DataObjectConstants.OUTPUT_DIRECTORY, outputDirectory);
    jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
    jobConfig.put(DataObjectConstants.DSIZE, dsize);
    jobConfig.put(DataObjectConstants.CSIZE, csize);
    jobConfig.put(DataObjectConstants.WORKERS, workers);
    jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, numFiles);
    jobConfig.put(DataObjectConstants.DIMENSIONS, dimension);
    jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, parallelismValue);
    jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
    jobConfig.put(DataObjectConstants.ARGS_ITERATIONS, iterations);
    jobConfig.put(DataObjectConstants.JOB_TYPE, jobType);
    jobConfig.put(DataObjectConstants.FILE_TYPE, fileType);
    Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
    jobBuilder.setJobName("KMeans-job");
    if (Context.TWISTER2_GRAPH_JOB.equals(jobType)) {
        jobBuilder.setWorkerClass(KMeansComputeJob.class.getName());
    } else if (Context.TWISTER2_TSET_JOB.equals(jobType)) {
        jobBuilder.setWorkerClass(KMeansTsetJob.class.getName());
    }
    jobBuilder.addComputeResource(2, 512, 1.0, workers);
    jobBuilder.setConfig(jobConfig);
    // now submit the job
    Twister2Submitter.submitJob(jobBuilder.build(), config);
}
Also used : Options(org.apache.commons.cli.Options) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 37 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class KMeansCheckpointingWorker method main.

public static void main(String[] args) throws ParseException {
    LOG.info("KMeans Clustering Job with fault tolerance");
    String jobName = "KMeans-faultolerance-job";
    // first load the configurations from command line and config files
    HashMap<String, Object> c = new HashMap<>();
    c.put(Context.JOB_ID, jobName);
    Config config = ResourceAllocator.loadConfig(c);
    Options options = new Options();
    options.addOption(DataObjectConstants.WORKERS, true, "Workers");
    options.addOption(DataObjectConstants.CSIZE, true, "Size of the dapoints file");
    options.addOption(DataObjectConstants.DSIZE, true, "Size of the centroids file");
    options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
    options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
    options.addOption(DataObjectConstants.DIMENSIONS, true, "dim");
    options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
    options.addOption(DataObjectConstants.ARGS_ITERATIONS, true, "iter");
    options.addOption(Utils.createOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.CINPUT_DIRECTORY, true, "Centroids Input directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.OUTPUT_DIRECTORY, true, "Output directory", true));
    options.addOption(Utils.createOption(DataObjectConstants.FILE_SYSTEM, true, "file system", true));
    CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
    int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
    int csize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.CSIZE));
    int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
    int dimension = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DIMENSIONS));
    int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
    int iterations = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.ARGS_ITERATIONS));
    String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
    String centroidDirectory = cmd.getOptionValue(DataObjectConstants.CINPUT_DIRECTORY);
    String outputDirectory = cmd.getOptionValue(DataObjectConstants.OUTPUT_DIRECTORY);
    String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
    boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
    // build JobConfig
    JobConfig jobConfig = new JobConfig();
    jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
    jobConfig.put(DataObjectConstants.CINPUT_DIRECTORY, centroidDirectory);
    jobConfig.put(DataObjectConstants.OUTPUT_DIRECTORY, outputDirectory);
    jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
    jobConfig.put(DataObjectConstants.DSIZE, Integer.toString(dsize));
    jobConfig.put(DataObjectConstants.CSIZE, Integer.toString(csize));
    jobConfig.put(DataObjectConstants.WORKERS, Integer.toString(workers));
    jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, Integer.toString(numFiles));
    jobConfig.put(DataObjectConstants.DIMENSIONS, Integer.toString(dimension));
    jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, Integer.toString(parallelismValue));
    jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
    jobConfig.put(DataObjectConstants.ARGS_ITERATIONS, Integer.toString(iterations));
    Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
    jobBuilder.setJobName(jobName);
    jobBuilder.setWorkerClass(KMeansCheckpointingWorker.class.getName());
    jobBuilder.addComputeResource(2, 512, 1.0, workers);
    jobBuilder.setConfig(jobConfig);
    // now submit the job
    Twister2Submitter.submitJob(jobBuilder.build(), config);
}
Also used : Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) CommandLine(org.apache.commons.cli.CommandLine) DataObject(edu.iu.dsc.tws.api.dataset.DataObject) CommandLineParser(org.apache.commons.cli.CommandLineParser) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 38 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class SortJob method main.

public static void main(String[] args) throws ParseException {
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    JobConfig jobConfig = new JobConfig();
    Options options = new Options();
    // file based mode configuration
    options.addOption(createOption(ARG_INPUT_FILE, true, "Path to the file containing input tuples. " + "Path can be specified with %d, where it will be replaced by task index. For example," + "input-%d, will be considered as input-0 in source task having index 0.", false));
    // non-file based mode configurations
    options.addOption(createOption(ARG_SIZE, true, "Data Size in GigaBytes. " + "A source will generate this much of data. Including size of both key and value.", false));
    options.addOption(createOption(ARG_KEY_SIZE, true, "Size of the key in bytes of a single Tuple", true));
    options.addOption(createOption(ARG_KEY_SEED, true, "Size of the key in bytes of a single Tuple", false));
    options.addOption(createOption(ARG_VALUE_SIZE, true, "Size of the value in bytes of a single Tuple", true));
    // resources
    options.addOption(createOption(ARG_RESOURCE_CPU, true, "Amount of CPUs to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_MEMORY, true, "Amount of Memory in mega bytes to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_INSTANCES, true, "No. of instances", true));
    // tasks and sources counts
    options.addOption(createOption(ARG_TASKS_SOURCES, true, "No of source tasks", true));
    options.addOption(createOption(ARG_TASKS_SINKS, true, "No of sink tasks", true));
    // optional configurations (tune performance)
    options.addOption(createOption(ARG_TUNE_MAX_BYTES_IN_MEMORY, true, "Maximum bytes to keep in memory", false));
    options.addOption(createOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, true, "Maximum records to keep in memory", false));
    options.addOption(createOption(ARG_BENCHMARK_METADATA, true, "Auto generated argument by benchmark suite", false));
    // output folder
    options.addOption(createOption(ARG_OUTPUT_FOLDER, true, "Folder to save output files", false));
    // fixed schema
    options.addOption(createOption(ARG_FIXED_SCHEMA, false, "Use fixed schema feature", false));
    CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    if (cmd.hasOption(ARG_INPUT_FILE)) {
        jobConfig.put(ARG_INPUT_FILE, cmd.getOptionValue(ARG_INPUT_FILE));
    } else {
        jobConfig.put(ARG_SIZE, Double.valueOf(cmd.getOptionValue(ARG_SIZE)));
        jobConfig.put(ARG_VALUE_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_VALUE_SIZE)));
        jobConfig.put(ARG_KEY_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_KEY_SIZE)));
    }
    jobConfig.put(ARG_TASKS_SOURCES, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SOURCES)));
    jobConfig.put(ARG_TASKS_SINKS, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SINKS)));
    jobConfig.put(ARG_RESOURCE_INSTANCES, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)));
    if (cmd.hasOption(ARG_TUNE_MAX_BYTES_IN_MEMORY)) {
        long maxBytesInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_BYTES_IN_MEMORY));
        jobConfig.put(SHUFFLE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
        // for benchmark service
        jobConfig.put(ARG_TUNE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
    }
    if (cmd.hasOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE)) {
        long maxRecordsInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE));
        jobConfig.put(SHUFFLE_MAX_FILE_SIZE, maxRecordsInMemory);
        jobConfig.put(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, maxRecordsInMemory);
    }
    if (cmd.hasOption(ARG_BENCHMARK_METADATA)) {
        jobConfig.put(ARG_BENCHMARK_METADATA, cmd.getOptionValue(ARG_BENCHMARK_METADATA));
        jobConfig.put(ARG_RUN_BENCHMARK, true);
    }
    if (cmd.hasOption(ARG_OUTPUT_FOLDER)) {
        jobConfig.put(ARG_OUTPUT_FOLDER, cmd.getOptionValue(ARG_OUTPUT_FOLDER));
    }
    if (cmd.hasOption(ARG_FIXED_SCHEMA)) {
        jobConfig.put(ARG_FIXED_SCHEMA, true);
    }
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setJobName(SortJob.class.getName()).setWorkerClass(SortJob.class.getName()).addComputeResource(Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_CPU)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_MEMORY)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES))).setConfig(jobConfig).build();
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) CommandLineParser(org.apache.commons.cli.CommandLineParser) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 39 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class ZKJobLister method listJobs.

/**
 * list jobs from ZooKeeper
 */
public static void listJobs() {
    CuratorFramework client = ZKUtils.connectToServer(ZKContext.serverAddresses(config));
    String rootPath = ZKContext.rootNode(config);
    List<JobWithState> jobs;
    try {
        jobs = JobZNodeManager.getJobs(client, rootPath);
    } catch (Exception e) {
        LOG.log(Level.SEVERE, "Could not get jobs from zookeeper", e);
        return;
    }
    if (jobs.size() == 0) {
        LOG.info("\nNumber of all jobs: " + jobs.size());
        return;
    }
    int maxJobIdLength = jobs.stream().mapToInt(j -> j.getJob().getJobId().length()).max().orElseThrow(() -> new RuntimeException("No valid jobID in jobs"));
    List<JobWithState> finishedJobs = jobs.stream().filter(j -> j.finished()).collect(Collectors.toList());
    List<JobWithState> activeJobs = jobs.stream().filter(j -> j.active()).collect(Collectors.toList());
    int jobIDColumn = maxJobIdLength + 3;
    String format = "%-" + jobIDColumn + "s%-12s%s\n";
    int lineWidth = jobIDColumn + 12 + "Number of workers".length();
    String separator = StringUtils.repeat('=', lineWidth);
    StringBuilder buffer = new StringBuilder();
    Formatter f = new Formatter(buffer);
    f.format("\n\n%s", "Number of all jobs: " + jobs.size());
    f.format("\n%s", "");
    f.format("\n%s", "List of finished jobs: " + finishedJobs.size() + "\n");
    outputJobs(finishedJobs, f, format, separator);
    f.format("\n%s", "");
    f.format("\n%s", "List of active jobs: " + activeJobs.size() + "\n");
    outputJobs(activeJobs, f, format, separator);
    LOG.info(buffer.toString());
}
Also used : WorkerWithState(edu.iu.dsc.tws.common.zk.WorkerWithState) CommandLineParser(org.apache.commons.cli.CommandLineParser) SchedulerContext(edu.iu.dsc.tws.api.config.SchedulerContext) ConfigLoader(edu.iu.dsc.tws.common.config.ConfigLoader) Options(org.apache.commons.cli.Options) ZKUtils(edu.iu.dsc.tws.common.zk.ZKUtils) JobWithState(edu.iu.dsc.tws.common.zk.JobWithState) Config(edu.iu.dsc.tws.api.config.Config) ZKPersStateManager(edu.iu.dsc.tws.common.zk.ZKPersStateManager) Logger(java.util.logging.Logger) HelpFormatter(org.apache.commons.cli.HelpFormatter) Collectors(java.util.stream.Collectors) StringUtils(org.apache.commons.lang3.StringUtils) Formatter(java.util.Formatter) Level(java.util.logging.Level) DefaultParser(org.apache.commons.cli.DefaultParser) List(java.util.List) CuratorFramework(org.apache.curator.framework.CuratorFramework) Context(edu.iu.dsc.tws.api.config.Context) ParseException(org.apache.commons.cli.ParseException) ZKContext(edu.iu.dsc.tws.common.zk.ZKContext) CommandLine(org.apache.commons.cli.CommandLine) JobZNodeManager(edu.iu.dsc.tws.common.zk.JobZNodeManager) Option(org.apache.commons.cli.Option) CuratorFramework(org.apache.curator.framework.CuratorFramework) HelpFormatter(org.apache.commons.cli.HelpFormatter) Formatter(java.util.Formatter) JobWithState(edu.iu.dsc.tws.common.zk.JobWithState) ParseException(org.apache.commons.cli.ParseException)

Example 40 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class ZKJobLister method loadConfigurations.

public static Config loadConfigurations(CommandLine cmd) {
    String twister2Home = cmd.getOptionValue("twister2_home");
    String configDir = cmd.getOptionValue("config_path");
    String jobID = cmd.getOptionValue("job_id");
    Config conf = ConfigLoader.loadConfig(twister2Home, configDir);
    return Config.newBuilder().putAll(conf).put(Context.TWISTER2_HOME.getKey(), twister2Home).put(SchedulerContext.CONFIG_DIR, configDir).put(Context.JOB_ID, jobID).build();
}
Also used : Config(edu.iu.dsc.tws.api.config.Config)

Aggregations

Config (edu.iu.dsc.tws.api.config.Config)169 JobConfig (edu.iu.dsc.tws.api.JobConfig)101 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)52 CommandLine (org.apache.commons.cli.CommandLine)27 CommandLineParser (org.apache.commons.cli.CommandLineParser)27 DefaultParser (org.apache.commons.cli.DefaultParser)27 Options (org.apache.commons.cli.Options)27 HashMap (java.util.HashMap)26 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)18 Map (java.util.Map)15 TaskSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskSchedulePlan)13 WorkerPlan (edu.iu.dsc.tws.api.compute.schedule.elements.WorkerPlan)12 LinkedHashMap (java.util.LinkedHashMap)12 Test (org.junit.Test)12 Path (edu.iu.dsc.tws.api.data.Path)10 TaskInstancePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskInstancePlan)9 WorkerSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.WorkerSchedulePlan)9 JobAPI (edu.iu.dsc.tws.proto.system.job.JobAPI)9 TaskSchedulerClassTest (edu.iu.dsc.tws.tsched.utils.TaskSchedulerClassTest)9 ExecutionPlan (edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)8