use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class KMeansMain method main.
public static void main(String[] args) throws ParseException {
LOG.log(Level.INFO, "KMeans Clustering Job");
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
Options options = new Options();
options.addOption(DataObjectConstants.WORKERS, true, "Workers");
options.addOption(DataObjectConstants.CSIZE, true, "Size of the dapoints file");
options.addOption(DataObjectConstants.DSIZE, true, "Size of the centroids file");
options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
options.addOption(DataObjectConstants.DIMENSIONS, true, "dim");
options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
options.addOption(DataObjectConstants.ARGS_ITERATIONS, true, "iter");
options.addOption(DataObjectConstants.JOB_TYPE, true, "type");
options.addOption(Utils.createOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory", true));
options.addOption(Utils.createOption(DataObjectConstants.CINPUT_DIRECTORY, true, "Centroids Input directory", true));
options.addOption(Utils.createOption(DataObjectConstants.OUTPUT_DIRECTORY, true, "Output directory", true));
options.addOption(Utils.createOption(DataObjectConstants.FILE_SYSTEM, true, "file system", true));
options.addOption(Utils.createOption(DataObjectConstants.FILE_TYPE, true, "file type", true));
@SuppressWarnings("deprecation") CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
int csize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.CSIZE));
int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
int dimension = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DIMENSIONS));
int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
int iterations = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.ARGS_ITERATIONS));
String jobType = cmd.getOptionValue(DataObjectConstants.JOB_TYPE);
String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
String centroidDirectory = cmd.getOptionValue(DataObjectConstants.CINPUT_DIRECTORY);
String outputDirectory = cmd.getOptionValue(DataObjectConstants.OUTPUT_DIRECTORY);
String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
String fileType = cmd.getOptionValue(DataObjectConstants.FILE_TYPE);
boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
jobConfig.put(DataObjectConstants.CINPUT_DIRECTORY, centroidDirectory);
jobConfig.put(DataObjectConstants.OUTPUT_DIRECTORY, outputDirectory);
jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
jobConfig.put(DataObjectConstants.DSIZE, dsize);
jobConfig.put(DataObjectConstants.CSIZE, csize);
jobConfig.put(DataObjectConstants.WORKERS, workers);
jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, numFiles);
jobConfig.put(DataObjectConstants.DIMENSIONS, dimension);
jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, parallelismValue);
jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
jobConfig.put(DataObjectConstants.ARGS_ITERATIONS, iterations);
jobConfig.put(DataObjectConstants.JOB_TYPE, jobType);
jobConfig.put(DataObjectConstants.FILE_TYPE, fileType);
Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
jobBuilder.setJobName("KMeans-job");
if (Context.TWISTER2_GRAPH_JOB.equals(jobType)) {
jobBuilder.setWorkerClass(KMeansComputeJob.class.getName());
} else if (Context.TWISTER2_TSET_JOB.equals(jobType)) {
jobBuilder.setWorkerClass(KMeansTsetJob.class.getName());
}
jobBuilder.addComputeResource(2, 512, 1.0, workers);
jobBuilder.setConfig(jobConfig);
// now submit the job
Twister2Submitter.submitJob(jobBuilder.build(), config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class KMeansCheckpointingWorker method main.
public static void main(String[] args) throws ParseException {
LOG.info("KMeans Clustering Job with fault tolerance");
String jobName = "KMeans-faultolerance-job";
// first load the configurations from command line and config files
HashMap<String, Object> c = new HashMap<>();
c.put(Context.JOB_ID, jobName);
Config config = ResourceAllocator.loadConfig(c);
Options options = new Options();
options.addOption(DataObjectConstants.WORKERS, true, "Workers");
options.addOption(DataObjectConstants.CSIZE, true, "Size of the dapoints file");
options.addOption(DataObjectConstants.DSIZE, true, "Size of the centroids file");
options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
options.addOption(DataObjectConstants.DIMENSIONS, true, "dim");
options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
options.addOption(DataObjectConstants.ARGS_ITERATIONS, true, "iter");
options.addOption(Utils.createOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory", true));
options.addOption(Utils.createOption(DataObjectConstants.CINPUT_DIRECTORY, true, "Centroids Input directory", true));
options.addOption(Utils.createOption(DataObjectConstants.OUTPUT_DIRECTORY, true, "Output directory", true));
options.addOption(Utils.createOption(DataObjectConstants.FILE_SYSTEM, true, "file system", true));
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
int csize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.CSIZE));
int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
int dimension = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DIMENSIONS));
int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
int iterations = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.ARGS_ITERATIONS));
String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
String centroidDirectory = cmd.getOptionValue(DataObjectConstants.CINPUT_DIRECTORY);
String outputDirectory = cmd.getOptionValue(DataObjectConstants.OUTPUT_DIRECTORY);
String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
jobConfig.put(DataObjectConstants.CINPUT_DIRECTORY, centroidDirectory);
jobConfig.put(DataObjectConstants.OUTPUT_DIRECTORY, outputDirectory);
jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
jobConfig.put(DataObjectConstants.DSIZE, Integer.toString(dsize));
jobConfig.put(DataObjectConstants.CSIZE, Integer.toString(csize));
jobConfig.put(DataObjectConstants.WORKERS, Integer.toString(workers));
jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, Integer.toString(numFiles));
jobConfig.put(DataObjectConstants.DIMENSIONS, Integer.toString(dimension));
jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, Integer.toString(parallelismValue));
jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
jobConfig.put(DataObjectConstants.ARGS_ITERATIONS, Integer.toString(iterations));
Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
jobBuilder.setJobName(jobName);
jobBuilder.setWorkerClass(KMeansCheckpointingWorker.class.getName());
jobBuilder.addComputeResource(2, 512, 1.0, workers);
jobBuilder.setConfig(jobConfig);
// now submit the job
Twister2Submitter.submitJob(jobBuilder.build(), config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class SortJob method main.
public static void main(String[] args) throws ParseException {
Config config = ResourceAllocator.loadConfig(new HashMap<>());
JobConfig jobConfig = new JobConfig();
Options options = new Options();
// file based mode configuration
options.addOption(createOption(ARG_INPUT_FILE, true, "Path to the file containing input tuples. " + "Path can be specified with %d, where it will be replaced by task index. For example," + "input-%d, will be considered as input-0 in source task having index 0.", false));
// non-file based mode configurations
options.addOption(createOption(ARG_SIZE, true, "Data Size in GigaBytes. " + "A source will generate this much of data. Including size of both key and value.", false));
options.addOption(createOption(ARG_KEY_SIZE, true, "Size of the key in bytes of a single Tuple", true));
options.addOption(createOption(ARG_KEY_SEED, true, "Size of the key in bytes of a single Tuple", false));
options.addOption(createOption(ARG_VALUE_SIZE, true, "Size of the value in bytes of a single Tuple", true));
// resources
options.addOption(createOption(ARG_RESOURCE_CPU, true, "Amount of CPUs to allocate per instance", true));
options.addOption(createOption(ARG_RESOURCE_MEMORY, true, "Amount of Memory in mega bytes to allocate per instance", true));
options.addOption(createOption(ARG_RESOURCE_INSTANCES, true, "No. of instances", true));
// tasks and sources counts
options.addOption(createOption(ARG_TASKS_SOURCES, true, "No of source tasks", true));
options.addOption(createOption(ARG_TASKS_SINKS, true, "No of sink tasks", true));
// optional configurations (tune performance)
options.addOption(createOption(ARG_TUNE_MAX_BYTES_IN_MEMORY, true, "Maximum bytes to keep in memory", false));
options.addOption(createOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, true, "Maximum records to keep in memory", false));
options.addOption(createOption(ARG_BENCHMARK_METADATA, true, "Auto generated argument by benchmark suite", false));
// output folder
options.addOption(createOption(ARG_OUTPUT_FOLDER, true, "Folder to save output files", false));
// fixed schema
options.addOption(createOption(ARG_FIXED_SCHEMA, false, "Use fixed schema feature", false));
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
if (cmd.hasOption(ARG_INPUT_FILE)) {
jobConfig.put(ARG_INPUT_FILE, cmd.getOptionValue(ARG_INPUT_FILE));
} else {
jobConfig.put(ARG_SIZE, Double.valueOf(cmd.getOptionValue(ARG_SIZE)));
jobConfig.put(ARG_VALUE_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_VALUE_SIZE)));
jobConfig.put(ARG_KEY_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_KEY_SIZE)));
}
jobConfig.put(ARG_TASKS_SOURCES, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SOURCES)));
jobConfig.put(ARG_TASKS_SINKS, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SINKS)));
jobConfig.put(ARG_RESOURCE_INSTANCES, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)));
if (cmd.hasOption(ARG_TUNE_MAX_BYTES_IN_MEMORY)) {
long maxBytesInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_BYTES_IN_MEMORY));
jobConfig.put(SHUFFLE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
// for benchmark service
jobConfig.put(ARG_TUNE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
}
if (cmd.hasOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE)) {
long maxRecordsInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE));
jobConfig.put(SHUFFLE_MAX_FILE_SIZE, maxRecordsInMemory);
jobConfig.put(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, maxRecordsInMemory);
}
if (cmd.hasOption(ARG_BENCHMARK_METADATA)) {
jobConfig.put(ARG_BENCHMARK_METADATA, cmd.getOptionValue(ARG_BENCHMARK_METADATA));
jobConfig.put(ARG_RUN_BENCHMARK, true);
}
if (cmd.hasOption(ARG_OUTPUT_FOLDER)) {
jobConfig.put(ARG_OUTPUT_FOLDER, cmd.getOptionValue(ARG_OUTPUT_FOLDER));
}
if (cmd.hasOption(ARG_FIXED_SCHEMA)) {
jobConfig.put(ARG_FIXED_SCHEMA, true);
}
Twister2Job twister2Job;
twister2Job = Twister2Job.newBuilder().setJobName(SortJob.class.getName()).setWorkerClass(SortJob.class.getName()).addComputeResource(Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_CPU)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_MEMORY)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES))).setConfig(jobConfig).build();
Twister2Submitter.submitJob(twister2Job, config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class ZKJobLister method listJobs.
/**
* list jobs from ZooKeeper
*/
public static void listJobs() {
CuratorFramework client = ZKUtils.connectToServer(ZKContext.serverAddresses(config));
String rootPath = ZKContext.rootNode(config);
List<JobWithState> jobs;
try {
jobs = JobZNodeManager.getJobs(client, rootPath);
} catch (Exception e) {
LOG.log(Level.SEVERE, "Could not get jobs from zookeeper", e);
return;
}
if (jobs.size() == 0) {
LOG.info("\nNumber of all jobs: " + jobs.size());
return;
}
int maxJobIdLength = jobs.stream().mapToInt(j -> j.getJob().getJobId().length()).max().orElseThrow(() -> new RuntimeException("No valid jobID in jobs"));
List<JobWithState> finishedJobs = jobs.stream().filter(j -> j.finished()).collect(Collectors.toList());
List<JobWithState> activeJobs = jobs.stream().filter(j -> j.active()).collect(Collectors.toList());
int jobIDColumn = maxJobIdLength + 3;
String format = "%-" + jobIDColumn + "s%-12s%s\n";
int lineWidth = jobIDColumn + 12 + "Number of workers".length();
String separator = StringUtils.repeat('=', lineWidth);
StringBuilder buffer = new StringBuilder();
Formatter f = new Formatter(buffer);
f.format("\n\n%s", "Number of all jobs: " + jobs.size());
f.format("\n%s", "");
f.format("\n%s", "List of finished jobs: " + finishedJobs.size() + "\n");
outputJobs(finishedJobs, f, format, separator);
f.format("\n%s", "");
f.format("\n%s", "List of active jobs: " + activeJobs.size() + "\n");
outputJobs(activeJobs, f, format, separator);
LOG.info(buffer.toString());
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class ZKJobLister method loadConfigurations.
public static Config loadConfigurations(CommandLine cmd) {
String twister2Home = cmd.getOptionValue("twister2_home");
String configDir = cmd.getOptionValue("config_path");
String jobID = cmd.getOptionValue("job_id");
Config conf = ConfigLoader.loadConfig(twister2Home, configDir);
return Config.newBuilder().putAll(conf).put(Context.TWISTER2_HOME.getKey(), twister2Home).put(SchedulerContext.CONFIG_DIR, configDir).put(Context.JOB_ID, jobID).build();
}
Aggregations