use of edu.iu.dsc.tws.api.compute.executor.IExecutor in project twister2 by DSC-SPIDAL.
the class KMeansComputeJob method execute.
/**
* First, the execute method invokes the generateDataPoints method to generate the datapoints file
* and centroid file based on the respective filesystem submitted by the user. Next, it invoke
* the DataObjectSource and DataObjectSink to partition and read the partitioned data points
* respectively through data points task graph. Then, it calls the DataFileReader to read the
* centroid values from the filesystem through centroid task graph. Next, the datapoints are
* stored in DataSet \(0th object\) and centroids are stored in DataSet 1st object\). Finally, it
* constructs the kmeans task graph to perform the clustering process which computes the distance
* between the centroids and data points.
*/
@SuppressWarnings("unchecked")
@Override
public void execute(WorkerEnvironment workerEnv) {
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
LOG.log(Level.FINE, "Task worker starting: " + workerId);
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
TaskExecutor taskExecutor = cEnv.getTaskExecutor();
int parallelismValue = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dimension = config.getIntegerValue(DataObjectConstants.DIMENSIONS);
int numFiles = config.getIntegerValue(DataObjectConstants.NUMBER_OF_FILES);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
int csize = config.getIntegerValue(DataObjectConstants.CSIZE);
int iterations = config.getIntegerValue(DataObjectConstants.ARGS_ITERATIONS);
String dataDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY) + workerId;
String centroidDirectory = config.getStringValue(DataObjectConstants.CINPUT_DIRECTORY) + workerId;
String type = config.getStringValue(DataObjectConstants.FILE_TYPE);
KMeansUtils.generateDataPoints(config, dimension, numFiles, dsize, csize, dataDirectory, centroidDirectory, type);
long startTime = System.currentTimeMillis();
/* First Graph to partition and read the partitioned data points **/
ComputeGraph datapointsTaskGraph = buildDataPointsTG(dataDirectory, dsize, parallelismValue, dimension, config, type);
/* Second Graph to read the centroids **/
ComputeGraph centroidsTaskGraph = buildCentroidsTG(centroidDirectory, csize, parallelismValue, dimension, config, type);
/* Third Graph to do the actual calculation **/
ComputeGraph kmeansTaskGraph = buildKMeansTG(parallelismValue, config);
// Get the execution plan for the first task graph
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
// Actual execution for the first taskgraph
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
// Get the execution plan for the second task graph
ExecutionPlan secondGraphExecutionPlan = taskExecutor.plan(centroidsTaskGraph);
// Actual execution for the second taskgraph
taskExecutor.execute(centroidsTaskGraph, secondGraphExecutionPlan);
long endTimeData = System.currentTimeMillis();
// Perform the iterations from 0 to 'n' number of iterations
IExecutor ex = taskExecutor.createExecution(kmeansTaskGraph);
for (int i = 0; i < iterations; i++) {
// actual execution of the third task graph
ex.execute(i == iterations - 1);
}
cEnv.close();
long endTime = System.currentTimeMillis();
LOG.info("Total K-Means Execution Time: " + (endTime - startTime) + "\tData Load time : " + (endTimeData - startTime) + "\tCompute Time : " + (endTime - endTimeData));
}
Aggregations