use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.
the class TeraSort method main.
public static void main(String[] args) throws ParseException {
Config config = ResourceAllocator.loadConfig(new HashMap<>());
JobConfig jobConfig = new JobConfig();
Options options = new Options();
// file based mode configuration
options.addOption(createOption(ARG_INPUT_FILE, true, "Path to the file containing input tuples. " + "Path can be specified with %d, where it will be replaced by task index. For example," + "input-%d, will be considered as input-0 in source task having index 0.", false));
// non-file based mode configurations
options.addOption(createOption(ARG_SIZE, true, "Total Data Size in GigaBytes for all workers.", true));
options.addOption(createOption(ARG_KEY_SIZE, true, "Size of the key in bytes of a single Tuple", true));
options.addOption(createOption(ARG_KEY_SEED, true, "Size of the key in bytes of a single Tuple", false));
options.addOption(createOption(ARG_VALUE_SIZE, true, "Size of the value in bytes of a single Tuple", true));
// resources
options.addOption(createOption(ARG_RESOURCE_CPU, true, "Amount of CPUs to allocate per instance", true));
options.addOption(createOption(ARG_RESOURCE_MEMORY, true, "Amount of Memory in mega bytes to allocate per instance", true));
options.addOption(createOption(ARG_RESOURCE_INSTANCES, true, "No. of instances", true));
options.addOption(createOption(ARG_RESOURCE_VOLATILE_DISK, true, "Volatile Disk for each worker at K8s", false));
options.addOption(createOption(ARG_WORKERS_PER_POD, true, "Workers per pod in Kubernetes", false));
// tasks and sources counts
options.addOption(createOption(ARG_TASKS_SOURCES, true, "No of source tasks", true));
options.addOption(createOption(ARG_TASKS_SINKS, true, "No of sink tasks", true));
// optional configurations (tune performance)
options.addOption(createOption(ARG_TUNE_MAX_BYTES_IN_MEMORY, true, "Maximum bytes to keep in memory", false));
options.addOption(createOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, true, "Maximum records to keep in memory", false));
options.addOption(createOption(ARG_BENCHMARK_METADATA, true, "Auto generated argument by benchmark suite", false));
// output folder
options.addOption(createOption(ARG_OUTPUT_FOLDER, true, "Folder to save output files", false));
// fixed schema
options.addOption(createOption(ARG_FIXED_SCHEMA, false, "Use fixed schema feature", false));
// verify option
options.addOption(createOption(VERIFY, false, "Verify whether the results are sorted.", false));
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
if (cmd.hasOption(ARG_INPUT_FILE)) {
jobConfig.put(ARG_INPUT_FILE, cmd.getOptionValue(ARG_INPUT_FILE));
} else {
jobConfig.put(ARG_SIZE, Double.valueOf(cmd.getOptionValue(ARG_SIZE)));
jobConfig.put(ARG_VALUE_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_VALUE_SIZE)));
jobConfig.put(ARG_KEY_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_KEY_SIZE)));
}
// in GB, default value is 4GB
double volatileDisk = 0.0;
if (cmd.hasOption(ARG_RESOURCE_VOLATILE_DISK)) {
volatileDisk = Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_VOLATILE_DISK));
}
// default value is 1
int workersPerPod = 1;
if (cmd.hasOption(ARG_WORKERS_PER_POD)) {
workersPerPod = Integer.valueOf(cmd.getOptionValue(ARG_WORKERS_PER_POD));
}
jobConfig.put(ARG_TASKS_SOURCES, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SOURCES)));
jobConfig.put(ARG_TASKS_SINKS, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SINKS)));
jobConfig.put(ARG_RESOURCE_INSTANCES, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)) * workersPerPod);
if (cmd.hasOption(ARG_TUNE_MAX_BYTES_IN_MEMORY)) {
long maxBytesInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_BYTES_IN_MEMORY));
jobConfig.put(SHUFFLE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
// for benchmark service
jobConfig.put(ARG_TUNE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
}
if (cmd.hasOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE)) {
long maxRecordsInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE));
jobConfig.put(SHUFFLE_MAX_FILE_SIZE, maxRecordsInMemory);
jobConfig.put(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, maxRecordsInMemory);
}
if (cmd.hasOption(ARG_BENCHMARK_METADATA)) {
jobConfig.put(ARG_BENCHMARK_METADATA, cmd.getOptionValue(ARG_BENCHMARK_METADATA));
jobConfig.put(ARG_RUN_BENCHMARK, true);
}
if (cmd.hasOption(ARG_OUTPUT_FOLDER)) {
jobConfig.put(ARG_OUTPUT_FOLDER, cmd.getOptionValue(ARG_OUTPUT_FOLDER));
}
if (cmd.hasOption(ARG_FIXED_SCHEMA)) {
jobConfig.put(ARG_FIXED_SCHEMA, true);
}
if (cmd.hasOption(VERIFY)) {
jobConfig.put(VERIFY, true);
}
Twister2Job twister2Job;
twister2Job = Twister2Job.newBuilder().setJobName("terasort").setWorkerClass(TeraSort.class.getName()).addComputeResource(Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_CPU)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_MEMORY)), volatileDisk, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)), workersPerPod).setConfig(jobConfig).build();
Twister2Submitter.submitJob(twister2Job, config);
}
use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.
the class CheckpointingTaskExample method main.
public static void main(String[] args) {
int numberOfWorkers = 4;
if (args.length == 1) {
numberOfWorkers = Integer.valueOf(args[0]);
}
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
// lets put a configuration here
JobConfig jobConfig = new JobConfig();
jobConfig.put("parallelism", numberOfWorkers);
Twister2Job twister2Job = Twister2Job.newBuilder().setJobName("hello-checkpointing-job").setWorkerClass(CheckpointingTaskExample.class).addComputeResource(1, 1024, numberOfWorkers).setConfig(jobConfig).build();
// now submit the job
Twister2Submitter.submitJob(twister2Job, config);
}
use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.
the class JobMasterClientExample method main.
/**
* a test class to run JMWorkerAgent
* First, a JobMaster instance should be started on a machine
* This client should connect to that server
* <p>
* It reads config files from conf/kubernetes directory
* It uses the first ComputeResource in that config file as the ComputeResource of this worker
* Number of workers is the number of workers in the first ComputeResource
* <p>
* When all workers joined, they get the full worker list
* Then, each worker sends a barrier message
* Then, each worker sends a completed message and closes
*/
public static void main(String[] args) {
if (args.length != 3) {
LOG.severe("Provide jmAddress workerID and jobID as parameters.");
return;
}
String jmAddress = args[0];
int workerID = Integer.parseInt(args[1]);
String jobID = args[2];
// we assume that the twister2Home is the current directory
// String configDir = "../twister2/config/src/yaml/";
String configDir = "";
String twister2Home = Paths.get(configDir).toAbsolutePath().toString();
Config config1 = ConfigLoader.loadConfig(twister2Home, "conf/kubernetes");
Config config2 = ConfigLoader.loadConfig(twister2Home, "conf/common");
Config config = updateConfig(config1, config2, jmAddress);
LOG.info("Loaded: " + config.size() + " configuration parameters.");
Twister2Job twister2Job = Twister2Job.loadTwister2Job(config, null);
twister2Job.setJobID(jobID);
JobAPI.Job job = twister2Job.serialize();
LOG.info("workerID: " + workerID);
LOG.info("jobID: " + jobID);
simulateClient(config, job, workerID);
}
use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.
the class HelloExample method main.
public static void main(String[] args) throws ParseException {
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
// build JobConfig
HashMap<String, Object> configurations = new HashMap<>();
configurations.put(SchedulerContext.THREADS_PER_WORKER, 1);
Options options = new Options();
options.addOption(CDFConstants.ARGS_PARALLELISM_VALUE, true, "2");
options.addOption(CDFConstants.ARGS_WORKERS, true, "2");
@SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
CommandLine commandLine = commandLineParser.parse(options, args);
int instances = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_WORKERS));
int parallelismValue = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_PARALLELISM_VALUE));
configurations.put(CDFConstants.ARGS_WORKERS, Integer.toString(instances));
configurations.put(CDFConstants.ARGS_PARALLELISM_VALUE, Integer.toString(parallelismValue));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.putAll(configurations);
config = Config.newBuilder().putAll(config).put(SchedulerContext.DRIVER_CLASS, null).build();
Twister2Job twister2Job;
twister2Job = Twister2Job.newBuilder().setWorkerClass(CDFWWorker.class).setJobName(HelloExample.class.getName()).setDriverClass(HelloDriver.class.getName()).addComputeResource(1, 512, instances).setConfig(jobConfig).build();
// now submit the job
Twister2Submitter.submitJob(twister2Job, config);
}
use of edu.iu.dsc.tws.api.Twister2Job in project twister2 by DSC-SPIDAL.
the class ParallelDataFlowsExample method main.
public static void main(String[] args) throws ParseException {
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
// build JobConfig
HashMap<String, Object> configurations = new HashMap<>();
configurations.put(SchedulerContext.THREADS_PER_WORKER, 1);
Options options = new Options();
options.addOption(CDFConstants.ARGS_PARALLELISM_VALUE, true, "2");
options.addOption(CDFConstants.ARGS_WORKERS, true, "2");
@SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
CommandLine commandLine = commandLineParser.parse(options, args);
int instances = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_WORKERS));
int parallelismValue = Integer.parseInt(commandLine.getOptionValue(CDFConstants.ARGS_PARALLELISM_VALUE));
configurations.put(CDFConstants.ARGS_WORKERS, Integer.toString(instances));
configurations.put(CDFConstants.ARGS_PARALLELISM_VALUE, Integer.toString(parallelismValue));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.putAll(configurations);
config = Config.newBuilder().putAll(config).put(SchedulerContext.DRIVER_CLASS, null).build();
Twister2Job twister2Job;
twister2Job = Twister2Job.newBuilder().setJobName(ParallelDataFlowsExample.class.getName()).setWorkerClass(CDFWWorker.class).setDriverClass(ParallelDataflowsDriver.class.getName()).addComputeResource(1, 512, instances, true).setConfig(jobConfig).build();
// now submit the job
Twister2Submitter.submitJob(twister2Job, config);
}
Aggregations