use of edu.iu.dsc.tws.task.ComputeEnvironment in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
// create source and aggregator
WordSource source = new WordSource();
WordAggregator counter = new WordAggregator();
// build the graph
ComputeGraphBuilder builder = ComputeGraphBuilder.newBuilder(workerEnv.getConfig());
builder.addSource("word-source", source, 4);
builder.addCompute("word-aggregator", counter, 4).partition("word-source").viaEdge(EDGE).withDataType(MessageTypes.OBJECT);
builder.setMode(OperationMode.STREAMING);
// build the graph
ComputeGraph graph = builder.build();
// execute graph
cEnv.getTaskExecutor().execute(graph);
}
use of edu.iu.dsc.tws.task.ComputeEnvironment in project twister2 by DSC-SPIDAL.
the class BenchTaskWorker method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
if (resultsRecorder == null) {
resultsRecorder = new BenchmarkResultsRecorder(config, workerId == 0);
}
Timing.setDefaultTimingUnit(TimingUnit.NANO_SECONDS);
jobParameters = JobParameters.build(config);
computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
if (jobParameters.isStream()) {
computeGraphBuilder.setMode(OperationMode.STREAMING);
} else {
computeGraphBuilder.setMode(OperationMode.BATCH);
}
inputDataArray = DataGenerator.generateIntData(jobParameters.getSize());
buildTaskGraph();
computeGraph = computeGraphBuilder.build();
executionPlan = cEnv.getTaskExecutor().plan(computeGraph);
IExecution execution = cEnv.getTaskExecutor().createExecution(computeGraph, executionPlan).iExecute();
if (jobParameters.isStream()) {
while (execution.progress() && (sendersInProgress.get() != 0 || receiversInProgress.get() != 0)) {
// do nothing
}
// now just spin for several iterations to progress the remaining communication.
// todo fix streaming to return false, when comm is done
long timeNow = System.currentTimeMillis();
LOG.info("Streaming Example task will wait 10secs to finish communication...");
while (System.currentTimeMillis() - timeNow < 10000) {
execution.progress();
}
} else {
while (execution.progress()) {
// do nothing
}
}
LOG.info("Stopping execution....");
execution.stop();
execution.close();
}
use of edu.iu.dsc.tws.task.ComputeEnvironment in project twister2 by DSC-SPIDAL.
the class ConstraintTaskExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
long startTime = System.currentTimeMillis();
LOG.log(Level.INFO, "Task worker starting: " + workerId);
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
TaskExecutor taskExecutor = cEnv.getTaskExecutor();
String dinput = String.valueOf(config.get(DataObjectConstants.DINPUT_DIRECTORY));
int dimension = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DIMENSIONS)));
int parallelismValue = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.PARALLELISM_VALUE)));
int dsize = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DSIZE)));
DataGenerator dataGenerator = new DataGenerator(config, workerId);
dataGenerator.generate(new Path(dinput), dsize, dimension);
ComputeGraph firstGraph = buildFirstGraph(parallelismValue, config, dinput, dsize, dimension, "firstgraphpoints", "1");
ComputeGraph secondGraph = buildSecondGraph(parallelismValue, config, dimension, "firstgraphpoints", "1");
// Get the execution plan for the first task graph
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(firstGraph);
taskExecutor.execute(firstGraph, firstGraphExecutionPlan);
DataObject<Object> firstGraphObject = taskExecutor.getOutput("firstsink");
// Get the execution plan for the second task graph
ExecutionPlan secondGraphExecutionPlan = taskExecutor.plan(secondGraph);
taskExecutor.addInput("firstgraphpoints", firstGraphObject);
taskExecutor.execute(secondGraph, secondGraphExecutionPlan);
long endTime = System.currentTimeMillis();
LOG.info("Total Execution Time: " + (endTime - startTime));
}
use of edu.iu.dsc.tws.task.ComputeEnvironment in project twister2 by DSC-SPIDAL.
the class KafkaExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
ComputeGraphBuilder graphBuilder = ComputeGraphBuilder.newBuilder(workerEnv.getConfig());
graphBuilder.setMode(OperationMode.STREAMING);
graphBuilder.addSource("ksource", new KSource(), 2);
graphBuilder.addCompute("sink", new KSink(), 2).direct("ksource").viaEdge("edge");
cEnv.buildAndExecute(graphBuilder);
}
use of edu.iu.dsc.tws.task.ComputeEnvironment in project twister2 by DSC-SPIDAL.
the class KMeansCheckpointingWorker method execute.
/**
* First, the execute method invokes the generateDataPoints method to generate the datapoints file
* and centroid file based on the respective filesystem submitted by the user. Next, it invoke
* the DataObjectSource and DataObjectSink to partition and read the partitioned data points
* respectively through data points task graph. Then, it calls the DataFileReader to read the
* centroid values from the filesystem through centroid task graph. Next, the datapoints are
* stored in DataSet \(0th object\) and centroids are stored in DataSet 1st object\). Finally, it
* constructs the kmeans task graph to perform the clustering process which computes the distance
* between the centroids and data points.
*/
@SuppressWarnings("unchecked")
@Override
public void execute(WorkerEnvironment workerEnv) {
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
IWorkerController workerController = workerEnv.getWorkerController();
ComputeEnvironment taskEnv = ComputeEnvironment.init(workerEnv);
CheckpointingWorkerEnv checkpointingEnv = CheckpointingWorkerEnv.newBuilder(config, workerId, workerController).registerVariable(I_KEY, IntegerPacker.getInstance()).registerVariable(CENT_OBJ, ObjectPacker.getInstance()).build();
Snapshot snapshot = checkpointingEnv.getSnapshot();
TaskExecutor taskExecutor = taskEnv.getTaskExecutor();
LOG.info("Task worker starting: " + workerId + " Current snapshot ver: " + snapshot.getVersion());
int parallelismValue = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dimension = config.getIntegerValue(DataObjectConstants.DIMENSIONS);
int numFiles = config.getIntegerValue(DataObjectConstants.NUMBER_OF_FILES);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
int csize = config.getIntegerValue(DataObjectConstants.CSIZE);
int iterations = config.getIntegerValue(DataObjectConstants.ARGS_ITERATIONS);
String dataDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY) + workerId;
String centroidDirectory = config.getStringValue(DataObjectConstants.CINPUT_DIRECTORY) + workerId;
String type = config.getStringValue(DataObjectConstants.FILE_TYPE);
KMeansUtils.generateDataPoints(config, dimension, numFiles, dsize, csize, dataDirectory, centroidDirectory, type);
long startTime = System.currentTimeMillis();
/* First Graph to partition and read the partitioned data points **/
ComputeGraph datapointsTaskGraph = KMeansComputeJob.buildDataPointsTG(dataDirectory, dsize, parallelismValue, dimension, config, type);
// Get the execution plan for the first task graph
ExecutionPlan datapointsExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(datapointsTaskGraph, datapointsExecutionPlan);
// Retrieve the output of the first task graph
DataObject<Object> dataPointsObject = taskExecutor.getOutput(datapointsTaskGraph, datapointsExecutionPlan, "datapointsink");
DataObject<Object> centroidsDataObject;
if (!snapshot.checkpointAvailable(CENT_OBJ)) {
/* Second Graph to read the centroids **/
ComputeGraph centroidsTaskGraph = KMeansComputeJob.buildCentroidsTG(centroidDirectory, csize, parallelismValue, dimension, config, type);
// Get the execution plan for the second task graph
ExecutionPlan centroidsExecutionPlan = taskExecutor.plan(centroidsTaskGraph);
// Actual execution for the second taskgraph
taskExecutor.execute(centroidsTaskGraph, centroidsExecutionPlan);
// Retrieve the output of the first task graph
centroidsDataObject = taskExecutor.getOutput(centroidsTaskGraph, centroidsExecutionPlan, "centroidsink");
} else {
centroidsDataObject = (DataObject<Object>) snapshot.get(CENT_OBJ);
}
long endTimeData = System.currentTimeMillis();
/* Third Graph to do the actual calculation **/
ComputeGraph kmeansTaskGraph = KMeansComputeJob.buildKMeansTG(parallelismValue, config);
// Perform the iterations from 0 to 'n' number of iterations
IExecutor ex = taskExecutor.createExecution(kmeansTaskGraph);
for (int i = 0; i < iterations; i++) {
// actual execution of the third task graph
ex.execute(i == iterations - 1);
}
DataPartition<?> centroidPartition = centroidsDataObject.getPartition(workerId);
double[][] centroid = (double[][]) centroidPartition.getConsumer().next();
long endTime = System.currentTimeMillis();
if (workerId == 0) {
LOG.info("Data Load time : " + (endTimeData - startTime) + "\n" + "Total Time : " + (endTime - startTime) + "Compute Time : " + (endTime - endTimeData));
}
LOG.info("Final Centroids After\t" + iterations + "\titerations\t" + Arrays.deepToString(centroid));
taskEnv.close();
}
Aggregations