use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class BasicAuroraJob method main.
public static void main(String[] args) {
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
System.out.println("read config values: " + config.size());
System.out.println(config);
// build JobConfig
HashMap<String, Object> configurations = new HashMap<>();
configurations.put(SchedulerContext.THREADS_PER_WORKER, 8);
JobConfig jobConfig = new JobConfig();
jobConfig.putAll(configurations);
// build the job
Twister2Job twister2Job = Twister2Job.loadTwister2Job(config, jobConfig);
// now submit the job
Twister2Submitter.submitJob(twister2Job, config);
// now terminate the job
terminateJob(config);
// jobWriteTest(twister2Job);
// jobReadTest();
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class ExecutionPlanBuilder method build.
@Override
public ExecutionPlan build(Config cfg, ComputeGraph taskGraph, TaskSchedulePlan taskSchedule) {
// we need to build the task plan
LogicalPlan logicalPlan = TaskPlanBuilder.build(workerId, workerInfoList, taskSchedule, taskIdGenerator);
ParallelOperationFactory opFactory = new ParallelOperationFactory(cfg, network, logicalPlan);
Map<Integer, WorkerSchedulePlan> containersMap = taskSchedule.getContainersMap();
WorkerSchedulePlan conPlan = containersMap.get(workerId);
if (conPlan == null) {
LOG.log(Level.INFO, "Cannot find worker in the task plan: " + workerId);
return null;
}
ExecutionPlan execution = new ExecutionPlan();
Set<TaskInstancePlan> instancePlan = conPlan.getTaskInstances();
long tasksVersion = 0L;
if (CheckpointingContext.isCheckpointingEnabled(cfg)) {
Set<Integer> globalTasks = Collections.emptySet();
if (workerId == 0) {
globalTasks = containersMap.values().stream().flatMap(containerPlan -> containerPlan.getTaskInstances().stream()).filter(ip -> taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointableTask && !(taskGraph.vertex(ip.getTaskName()).getTask() instanceof CheckpointingSGatherSink)).map(TaskInstancePlan::getTaskId).collect(Collectors.toSet());
}
try {
Checkpoint.FamilyInitializeResponse familyInitializeResponse = this.checkpointingClient.initFamily(workerId, containersMap.size(), taskGraph.getGraphName(), globalTasks);
tasksVersion = familyInitializeResponse.getVersion();
} catch (BlockingSendException e) {
throw new RuntimeException("Failed to register tasks with Checkpoint Manager", e);
}
LOG.info("Tasks will start with version " + tasksVersion);
}
// for each task we are going to create the communications
for (TaskInstancePlan ip : instancePlan) {
Vertex v = taskGraph.vertex(ip.getTaskName());
Map<String, Set<String>> inEdges = new HashMap<>();
Map<String, String> outEdges = new HashMap<>();
if (v == null) {
throw new RuntimeException("Non-existing task scheduled: " + ip.getTaskName());
}
INode node = v.getTask();
if (node instanceof ICompute || node instanceof ISource) {
// lets get the communication
Set<Edge> edges = taskGraph.outEdges(v);
// now lets create the communication object
for (Edge e : edges) {
Vertex child = taskGraph.childOfTask(v, e.getName());
// lets figure out the parents task id
Set<Integer> srcTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
Set<Integer> tarTasks = taskIdGenerator.getTaskIds(child, getTaskIdOfTask(child.getName(), taskSchedule));
Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
Map<Integer, Integer> tarGlobaToIndex = taskIdGenerator.getGlobalTaskToIndex(child, getTaskIdOfTask(child.getName(), taskSchedule));
createCommunication(child, e, v, srcTasks, tarTasks, srcGlobalToIndex, tarGlobaToIndex);
outEdges.put(e.getName(), child.getName());
}
}
if (node instanceof ICompute) {
// lets get the parent tasks
Set<Edge> parentEdges = taskGraph.inEdges(v);
for (Edge e : parentEdges) {
Vertex parent = taskGraph.getParentOfTask(v, e.getName());
// lets figure out the parents task id
Set<Integer> srcTasks = taskIdGenerator.getTaskIds(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
Set<Integer> tarTasks = taskIdGenerator.getTaskIds(v, ip.getTaskId());
Map<Integer, Integer> srcGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(parent, getTaskIdOfTask(parent.getName(), taskSchedule));
Map<Integer, Integer> tarGlobalToIndex = taskIdGenerator.getGlobalTaskToIndex(v, ip.getTaskId());
createCommunication(v, e, parent, srcTasks, tarTasks, srcGlobalToIndex, tarGlobalToIndex);
// if we are a grouped edge, we have to use the group name
String inEdge;
if (e.getTargetEdge() == null) {
inEdge = e.getName();
} else {
inEdge = e.getTargetEdge();
}
Set<String> parents = inEdges.get(inEdge);
if (parents == null) {
parents = new HashSet<>();
}
parents.add(inEdge);
inEdges.put(inEdge, parents);
}
}
// lets create the instance
INodeInstance iNodeInstance = createInstances(cfg, taskGraph.getGraphName(), ip, v, taskGraph.getOperationMode(), inEdges, outEdges, taskSchedule, tasksVersion);
// add to execution
execution.addNodes(v.getName(), taskIdGenerator.generateGlobalTaskId(ip.getTaskId(), ip.getTaskIndex()), iNodeInstance);
}
// now lets create the queues and start the execution
for (Table.Cell<String, String, Communication> cell : parOpTable.cellSet()) {
Communication c = cell.getValue();
// lets create the communication
OperationMode operationMode = taskGraph.getOperationMode();
IParallelOperation op;
assert c != null;
c.build();
if (c.getEdge().size() == 1) {
op = opFactory.build(c.getEdge(0), c.getSourceTasks(), c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
} else if (c.getEdge().size() > 1) {
// just join op for now. Could change in the future
// here the sources should be separated out for left and right edge
Set<Integer> sourceTasks = c.getSourceTasks();
Set<Integer> leftSources = new HashSet<>();
Set<Integer> rightSources = new HashSet<>();
if (!sourceTasks.isEmpty()) {
// just to safely do .get() calls without isPresent()
int minBin = (sourceTasks.stream().min(Integer::compareTo).get() / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET;
for (Integer source : sourceTasks) {
if ((source / TaskIdGenerator.TASK_OFFSET) * TaskIdGenerator.TASK_OFFSET == minBin) {
leftSources.add(source);
} else {
rightSources.add(source);
}
}
}
// now determine, which task is connected to which edge
Edge leftEdge = c.getEdge(0);
Edge rightEdge = c.getEdge(1);
op = opFactory.build(leftEdge, rightEdge, leftSources, rightSources, c.getTargetTasks(), operationMode, c.srcGlobalToIndex, c.tarGlobalToIndex);
} else {
throw new RuntimeException("Cannot have communication with 0 edges");
}
// now lets check the sources and targets that are in this executor
Set<Integer> sourcesOfThisWorker = intersectionOfTasks(conPlan, c.getSourceTasks());
Set<Integer> targetsOfThisWorker = intersectionOfTasks(conPlan, c.getTargetTasks());
// we use the target edge as the group name
String targetEdge;
if (c.getEdge().size() > 1) {
targetEdge = c.getEdge(0).getTargetEdge();
} else {
targetEdge = c.getEdge(0).getName();
}
// so along with the operation mode, the windowing mode must be tested
if (operationMode == OperationMode.STREAMING) {
for (Integer i : sourcesOfThisWorker) {
boolean found = false;
// we can have multiple source tasks for an operation
for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
String sourceTask = c.getSourceTask().get(sIndex);
if (streamingTaskInstances.contains(sourceTask, i)) {
TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(sourceTask, i);
taskStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
op.registerSync(i, taskStreamingInstance);
found = true;
} else if (streamingSourceInstances.contains(sourceTask, i)) {
SourceStreamingInstance sourceStreamingInstance = streamingSourceInstances.get(sourceTask, i);
sourceStreamingInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
}
if (!found) {
throw new RuntimeException("Not found: " + c.getSourceTask());
}
}
}
// we only have one target task always
for (Integer i : targetsOfThisWorker) {
if (streamingTaskInstances.contains(c.getTargetTask(), i)) {
TaskStreamingInstance taskStreamingInstance = streamingTaskInstances.get(c.getTargetTask(), i);
op.register(i, taskStreamingInstance.getInQueue());
taskStreamingInstance.registerInParallelOperation(targetEdge, op);
op.registerSync(i, taskStreamingInstance);
} else {
throw new RuntimeException("Not found: " + c.getTargetTask());
}
}
execution.addOps(op);
}
if (operationMode == OperationMode.BATCH) {
for (Integer i : sourcesOfThisWorker) {
boolean found = false;
// we can have multiple source tasks for an operation
for (int sIndex = 0; sIndex < c.getSourceTask().size(); sIndex++) {
String sourceTask = c.getSourceTask().get(sIndex);
if (batchTaskInstances.contains(sourceTask, i)) {
TaskBatchInstance taskBatchInstance = batchTaskInstances.get(sourceTask, i);
taskBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
} else if (batchSourceInstances.contains(sourceTask, i)) {
SourceBatchInstance sourceBatchInstance = batchSourceInstances.get(sourceTask, i);
sourceBatchInstance.registerOutParallelOperation(c.getEdge(sIndex).getName(), op);
found = true;
}
}
if (!found) {
throw new RuntimeException("Not found: " + c.getSourceTask());
}
}
for (Integer i : targetsOfThisWorker) {
if (batchTaskInstances.contains(c.getTargetTask(), i)) {
TaskBatchInstance taskBatchInstance = batchTaskInstances.get(c.getTargetTask(), i);
op.register(i, taskBatchInstance.getInQueue());
taskBatchInstance.registerInParallelOperation(targetEdge, op);
op.registerSync(i, taskBatchInstance);
} else {
throw new RuntimeException("Not found: " + c.getTargetTask());
}
}
execution.addOps(op);
}
}
return execution;
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class SsspMain method main.
public static void main(String[] args) throws ParseException, org.apache.commons.cli.ParseException {
LOG.log(Level.INFO, "Single source shorest path Clustering Job");
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
Options options = new Options();
options.addOption(DataObjectConstants.WORKERS, true, "Workers");
options.addOption(DataObjectConstants.DSIZE, true, "Size of the graph file");
options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
options.addOption("sourcev", true, "soruce vertex");
options.addOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory");
options.addOption(DataObjectConstants.FILE_SYSTEM, true, "file system");
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
String soruceVertex = cmd.getOptionValue("sourcev");
String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
jobConfig.put(DataObjectConstants.DSIZE, Integer.toString(dsize));
jobConfig.put(DataObjectConstants.WORKERS, Integer.toString(workers));
jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, Integer.toString(numFiles));
jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, Integer.toString(parallelismValue));
jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
jobConfig.put("sourcev", soruceVertex);
Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
jobBuilder.setJobName("Sssp-job");
jobBuilder.setWorkerClass(TestForSSSP.class.getName());
jobBuilder.addComputeResource(2, 512, 1.0, workers);
jobBuilder.setConfig(jobConfig);
// now submit the job
Twister2Submitter.submitJob(jobBuilder.build(), config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class PageRankMain method main.
public static void main(String[] args) throws ParseException {
LOG.log(Level.INFO, "pagerank Clustering Job");
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
Options options = new Options();
options.addOption(DataObjectConstants.WORKERS, true, "Workers");
options.addOption(DataObjectConstants.DSIZE, true, "Size of the edge list file");
options.addOption(DataObjectConstants.NUMBER_OF_FILES, true, "Number of files");
options.addOption(DataObjectConstants.SHARED_FILE_SYSTEM, false, "Shared file system");
options.addOption(DataObjectConstants.PARALLELISM_VALUE, true, "parallelism");
options.addOption(DataObjectConstants.ARGS_ITERATIONS, true, "iter");
options.addOption(DataObjectConstants.DINPUT_DIRECTORY, true, "Data points Input directory");
options.addOption(DataObjectConstants.FILE_SYSTEM, true, "file system");
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
int numFiles = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.NUMBER_OF_FILES));
int parallelismValue = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
int iterations = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.ARGS_ITERATIONS));
String dataDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
String fileSystem = cmd.getOptionValue(DataObjectConstants.FILE_SYSTEM);
boolean shared = Boolean.parseBoolean(cmd.getOptionValue(DataObjectConstants.SHARED_FILE_SYSTEM));
// build JobConfig
JobConfig jobConfig = new JobConfig();
jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, dataDirectory);
jobConfig.put(DataObjectConstants.FILE_SYSTEM, fileSystem);
jobConfig.put(DataObjectConstants.DSIZE, Integer.toString(dsize));
jobConfig.put(DataObjectConstants.WORKERS, Integer.toString(workers));
jobConfig.put(DataObjectConstants.NUMBER_OF_FILES, Integer.toString(numFiles));
jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, Integer.toString(parallelismValue));
jobConfig.put(DataObjectConstants.SHARED_FILE_SYSTEM, shared);
jobConfig.put(DataObjectConstants.ARGS_ITERATIONS, Integer.toString(iterations));
Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
jobBuilder.setJobName("pagerank-job");
jobBuilder.setWorkerClass(PageRankWorker.class.getName());
jobBuilder.addComputeResource(2, 512, 1.0, workers);
jobBuilder.setConfig(jobConfig);
// now submit the job
Twister2Submitter.submitJob(jobBuilder.build(), config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class TaskScheduler method generateTaskSchedulePlans.
private Map<String, TaskSchedulePlan> generateTaskSchedulePlans(String className) {
Class<?> taskSchedulerClass;
Method method;
Map<String, TaskSchedulePlan> taskSchedulePlanMap;
try {
taskSchedulerClass = getClass().getClassLoader().loadClass(className);
Object newInstance = taskSchedulerClass.newInstance();
method = taskSchedulerClass.getMethod("initialize", new Class<?>[] { Config.class });
method.invoke(newInstance, config);
method = taskSchedulerClass.getMethod("schedule", new Class<?>[] { WorkerPlan.class, ComputeGraph[].class });
taskSchedulePlanMap = (Map<String, TaskSchedulePlan>) method.invoke(newInstance, new Object[] { workerPlan, computeGraphs });
} catch (InvocationTargetException | IllegalAccessException | NoSuchMethodException | InstantiationException | ClassNotFoundException | TaskSchedulerException e) {
throw new Twister2RuntimeException(e);
}
return taskSchedulePlanMap;
}
Aggregations