Search in sources :

Example 6 with Twister2Job

use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.

the class TeraSort method main.

public static void main(String[] args) throws ParseException {
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    JobConfig jobConfig = new JobConfig();
    Options options = new Options();
    // file based mode configuration
    options.addOption(createOption(ARG_INPUT_FILE, true, "Path to the file containing input tuples. " + "Path can be specified with %d, where it will be replaced by task index. For example," + "input-%d, will be considered as input-0 in source task having index 0.", false));
    // non-file based mode configurations
    options.addOption(createOption(ARG_SIZE, true, "Total Data Size in GigaBytes for all workers.", true));
    options.addOption(createOption(ARG_KEY_SIZE, true, "Size of the key in bytes of a single Tuple", true));
    options.addOption(createOption(ARG_KEY_SEED, true, "Size of the key in bytes of a single Tuple", false));
    options.addOption(createOption(ARG_VALUE_SIZE, true, "Size of the value in bytes of a single Tuple", true));
    // resources
    options.addOption(createOption(ARG_RESOURCE_CPU, true, "Amount of CPUs to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_MEMORY, true, "Amount of Memory in mega bytes to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_INSTANCES, true, "No. of instances", true));
    options.addOption(createOption(ARG_RESOURCE_VOLATILE_DISK, true, "Volatile Disk for each worker at K8s", false));
    options.addOption(createOption(ARG_WORKERS_PER_POD, true, "Workers per pod in Kubernetes", false));
    // tasks and sources counts
    options.addOption(createOption(ARG_TASKS_SOURCES, true, "No of source tasks", true));
    options.addOption(createOption(ARG_TASKS_SINKS, true, "No of sink tasks", true));
    // optional configurations (tune performance)
    options.addOption(createOption(ARG_TUNE_MAX_BYTES_IN_MEMORY, true, "Maximum bytes to keep in memory", false));
    options.addOption(createOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, true, "Maximum records to keep in memory", false));
    options.addOption(createOption(ARG_BENCHMARK_METADATA, true, "Auto generated argument by benchmark suite", false));
    // output folder
    options.addOption(createOption(ARG_OUTPUT_FOLDER, true, "Folder to save output files", false));
    // fixed schema
    options.addOption(createOption(ARG_FIXED_SCHEMA, false, "Use fixed schema feature", false));
    // verify option
    options.addOption(createOption(VERIFY, false, "Verify whether the results are sorted.", false));
    CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    if (cmd.hasOption(ARG_INPUT_FILE)) {
        jobConfig.put(ARG_INPUT_FILE, cmd.getOptionValue(ARG_INPUT_FILE));
    } else {
        jobConfig.put(ARG_SIZE, Double.valueOf(cmd.getOptionValue(ARG_SIZE)));
        jobConfig.put(ARG_VALUE_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_VALUE_SIZE)));
        jobConfig.put(ARG_KEY_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_KEY_SIZE)));
    }
    // in GB, default value is 4GB
    double volatileDisk = 0.0;
    if (cmd.hasOption(ARG_RESOURCE_VOLATILE_DISK)) {
        volatileDisk = Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_VOLATILE_DISK));
    }
    // default value is 1
    int workersPerPod = 1;
    if (cmd.hasOption(ARG_WORKERS_PER_POD)) {
        workersPerPod = Integer.valueOf(cmd.getOptionValue(ARG_WORKERS_PER_POD));
    }
    jobConfig.put(ARG_TASKS_SOURCES, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SOURCES)));
    jobConfig.put(ARG_TASKS_SINKS, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SINKS)));
    jobConfig.put(ARG_RESOURCE_INSTANCES, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)) * workersPerPod);
    if (cmd.hasOption(ARG_TUNE_MAX_BYTES_IN_MEMORY)) {
        long maxBytesInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_BYTES_IN_MEMORY));
        jobConfig.put(SHUFFLE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
        // for benchmark service
        jobConfig.put(ARG_TUNE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
    }
    if (cmd.hasOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE)) {
        long maxRecordsInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE));
        jobConfig.put(SHUFFLE_MAX_FILE_SIZE, maxRecordsInMemory);
        jobConfig.put(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, maxRecordsInMemory);
    }
    if (cmd.hasOption(ARG_BENCHMARK_METADATA)) {
        jobConfig.put(ARG_BENCHMARK_METADATA, cmd.getOptionValue(ARG_BENCHMARK_METADATA));
        jobConfig.put(ARG_RUN_BENCHMARK, true);
    }
    if (cmd.hasOption(ARG_OUTPUT_FOLDER)) {
        jobConfig.put(ARG_OUTPUT_FOLDER, cmd.getOptionValue(ARG_OUTPUT_FOLDER));
    }
    if (cmd.hasOption(ARG_FIXED_SCHEMA)) {
        jobConfig.put(ARG_FIXED_SCHEMA, true);
    }
    if (cmd.hasOption(VERIFY)) {
        jobConfig.put(VERIFY, true);
    }
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setJobName("terasort").setWorkerClass(TeraSort.class.getName()).addComputeResource(Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_CPU)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_MEMORY)), volatileDisk, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)), workersPerPod).setConfig(jobConfig).build();
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) CommandLineParser(org.apache.commons.cli.CommandLineParser) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 7 with Twister2Job

use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.

the class CheckpointingTaskExample method main.

public static void main(String[] args) {
    int numberOfWorkers = 4;
    if (args.length == 1) {
        numberOfWorkers = Integer.valueOf(args[0]);
    }
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    // lets put a configuration here
    JobConfig jobConfig = new JobConfig();
    jobConfig.put("parallelism", numberOfWorkers);
    Twister2Job twister2Job = Twister2Job.newBuilder().setJobName("hello-checkpointing-job").setWorkerClass(CheckpointingTaskExample.class).addComputeResource(1, 1024, numberOfWorkers).setConfig(jobConfig).build();
    // now submit the job
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job)

Example 8 with Twister2Job

use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.

the class JobMasterClientExample method main.

/**
 * a test class to run JMWorkerAgent
 * First, a JobMaster instance should be started on a machine
 * This client should connect to that server
 * <p>
 * It reads config files from conf/kubernetes directory
 * It uses the first ComputeResource in that config file as the ComputeResource of this worker
 * Number of workers is the number of workers in the first ComputeResource
 * <p>
 * When all workers joined, they get the full worker list
 * Then, each worker sends a barrier message
 * Then, each worker sends a completed message and closes
 */
public static void main(String[] args) {
    if (args.length != 3) {
        LOG.severe("Provide jmAddress workerID and jobID as parameters.");
        return;
    }
    String jmAddress = args[0];
    int workerID = Integer.parseInt(args[1]);
    String jobID = args[2];
    // we assume that the twister2Home is the current directory
    // String configDir = "../twister2/config/src/yaml/";
    String configDir = "";
    String twister2Home = Paths.get(configDir).toAbsolutePath().toString();
    Config config1 = ConfigLoader.loadConfig(twister2Home, "conf/kubernetes");
    Config config2 = ConfigLoader.loadConfig(twister2Home, "conf/common");
    Config config = updateConfig(config1, config2, jmAddress);
    LOG.info("Loaded: " + config.size() + " configuration parameters.");
    Twister2Job twister2Job = Twister2Job.loadTwister2Job(config, null);
    twister2Job.setJobID(jobID);
    JobAPI.Job job = twister2Job.serialize();
    LOG.info("workerID: " + workerID);
    LOG.info("jobID: " + jobID);
    simulateClient(config, job, workerID);
}
Also used : Config(edu.iu.dsc.tws.api.config.Config) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI) Twister2Job(edu.iu.dsc.tws.api.Twister2Job)

Example 9 with Twister2Job

use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.

the class HelloExample method main.

public static void main(String[] args) throws ParseException {
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    // build JobConfig
    HashMap<String, Object> configurations = new HashMap<>();
    configurations.put(SchedulerContext.THREADS_PER_WORKER, 1);
    Options options = new Options();
    options.addOption(CDFConstants.ARGS_PARALLELISM_VALUE, true, "2");
    options.addOption(CDFConstants.ARGS_WORKERS, true, "2");
    @SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
    CommandLine commandLine = commandLineParser.parse(options, args);
    int instances = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_WORKERS));
    int parallelismValue = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_PARALLELISM_VALUE));
    configurations.put(CDFConstants.ARGS_WORKERS, Integer.toString(instances));
    configurations.put(CDFConstants.ARGS_PARALLELISM_VALUE, Integer.toString(parallelismValue));
    // build JobConfig
    JobConfig jobConfig = new JobConfig();
    jobConfig.putAll(configurations);
    config = Config.newBuilder().putAll(config).put(SchedulerContext.DRIVER_CLASS, null).build();
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setWorkerClass(CDFWWorker.class).setJobName(HelloExample.class.getName()).setDriverClass(HelloDriver.class.getName()).addComputeResource(1, 512, instances).setConfig(jobConfig).build();
    // now submit the job
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) DataFlowJobConfig(edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) DataFlowJobConfig(edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 10 with Twister2Job

use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.

the class ParallelDataFlowsExample method main.

public static void main(String[] args) throws ParseException {
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    // build JobConfig
    HashMap<String, Object> configurations = new HashMap<>();
    configurations.put(SchedulerContext.THREADS_PER_WORKER, 1);
    Options options = new Options();
    options.addOption(CDFConstants.ARGS_PARALLELISM_VALUE, true, "2");
    options.addOption(CDFConstants.ARGS_WORKERS, true, "2");
    @SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
    CommandLine commandLine = commandLineParser.parse(options, args);
    int instances = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_WORKERS));
    int parallelismValue = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_PARALLELISM_VALUE));
    configurations.put(CDFConstants.ARGS_WORKERS, Integer.toString(instances));
    configurations.put(CDFConstants.ARGS_PARALLELISM_VALUE, Integer.toString(parallelismValue));
    // build JobConfig
    JobConfig jobConfig = new JobConfig();
    jobConfig.putAll(configurations);
    config = Config.newBuilder().putAll(config).put(SchedulerContext.DRIVER_CLASS, null).build();
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setJobName(ParallelDataFlowsExample.class.getName()).setWorkerClass(CDFWWorker.class).setDriverClass(ParallelDataflowsDriver.class.getName()).addComputeResource(1, 512, instances, true).setConfig(jobConfig).build();
    // now submit the job
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) DataFlowJobConfig(edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) CDFWWorker(edu.iu.dsc.tws.task.impl.cdfw.CDFWWorker) DataFlowJobConfig(edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) DefaultParser(org.apache.commons.cli.DefaultParser)

Aggregations

Twister2Job (edu.iu.dsc.tws.api.Twister2Job)39 JobConfig (edu.iu.dsc.tws.api.JobConfig)27 Config (edu.iu.dsc.tws.api.config.Config)23 HashMap (java.util.HashMap)11 CommandLine (org.apache.commons.cli.CommandLine)8 CommandLineParser (org.apache.commons.cli.CommandLineParser)7 DefaultParser (org.apache.commons.cli.DefaultParser)7 Options (org.apache.commons.cli.Options)7 Twister2JobState (edu.iu.dsc.tws.api.scheduler.Twister2JobState)4 DataFlowJobConfig (edu.iu.dsc.tws.task.cdfw.DataFlowJobConfig)4 JobAPI (edu.iu.dsc.tws.proto.system.job.JobAPI)3 CDFWWorker (edu.iu.dsc.tws.task.impl.cdfw.CDFWWorker)3 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)2 KubernetesController (edu.iu.dsc.tws.rsched.schedulers.k8s.KubernetesController)2 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 DataObject (edu.iu.dsc.tws.api.dataset.DataObject)1 DriverJobState (edu.iu.dsc.tws.api.driver.DriverJobState)1 Twister2Exception (edu.iu.dsc.tws.api.exceptions.Twister2Exception)1 HelloWorld (edu.iu.dsc.tws.examples.basic.HelloWorld)1