use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class InputDataStreamer method getDataPointsByDataObject.
public Object getDataPointsByDataObject(int taskIndex, DataObject<?> datapointsDataObject) {
Iterator<ArrayList> arrayListIterator = (Iterator<ArrayList>) datapointsDataObject.getPartition(taskIndex).getConsumer().next();
List<Object> items = new ArrayList<>();
while (arrayListIterator.hasNext()) {
Object object = arrayListIterator.next();
items.add(object);
}
return items;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class TaskWorkerDataLoader method showAllUnits.
public void showAllUnits(DataObject<Object> dataPointsObject) {
for (int i = 0; i < dataPointsObject.getPartitions().length; i++) {
DataPartition<Object> values = dataPointsObject.getPartitions()[i];
DataPartitionConsumer<Object> dataPartitionConsumer = values.getConsumer();
// LOG.info("Final Receive : " + dataPartitionConsumer.hasNext());
while (dataPartitionConsumer.hasNext()) {
LOG.info(String.format("Id1[%d], Type: %s", i, dataPartitionConsumer.next().getClass().getName()));
Object object = dataPartitionConsumer.next();
if (object instanceof DataObjectImpl<?>) {
DataObjectImpl<?> dataObjectImpl = (DataObjectImpl<?>) object;
LOG.info(String.format("Id1[%d], Partition Count : %d", i, dataObjectImpl.getPartitionCount()));
int numpar = dataObjectImpl.getPartitions().length;
LOG.info("Number of Partitions : " + numpar);
for (int j = 0; j < dataObjectImpl.getPartitions().length; j++) {
DataPartition<?> values1 = dataObjectImpl.getPartitions()[j];
Object object1 = values1.getConsumer().next();
LOG.info(String.format("Ids[%d,%d] , Received Object : %s ", i, j, object1.getClass().getName()));
if (object1 instanceof Iterator<?>) {
Iterator<?> itr = (Iterator<?>) object1;
while (itr.hasNext()) {
Object object2 = itr.next();
if (object2 instanceof String) {
LOG.info(String.format("Ids[%d,%d] , Worker Id %d / %d, Data : %s", i, j, workerId, workers, String.valueOf(object2)));
}
}
}
}
}
}
}
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class TaskExecutor method collectData.
/**
* This method collects all the output from the provided {@link ExecutionPlan}.
* The partition IDs will be assigned just before adding the partitions to the {@link DataObject}
*/
public static void collectData(Config cfg, ExecutionPlan executionPlan, Map<String, DataObject> dataMap) {
Map<Integer, INodeInstance> nodes = executionPlan.getNodes();
Map<String, DataObject> dataObjectMapForPlan = new HashMap<>();
if (nodes != null) {
nodes.forEach((taskId, node) -> {
INode task = node.getNode();
if (task instanceof Collector) {
Set<String> collectibleNames = ((Collector) task).getCollectibleNames();
collectibleNames.forEach(name -> {
DataPartition partition = ((Collector) task).get(name);
// if this task outs only one partition and user has implemented no arg get() method
if (collectibleNames.size() == 1 && partition == null) {
partition = ((Collector) task).get();
}
if (partition != null) {
partition.setId(node.getIndex());
dataObjectMapForPlan.computeIfAbsent(name, n -> new DataObjectImpl<>(cfg)).addPartition(partition);
} else {
LOG.warning(String.format("Task index %d of task %d returned null for data %s", node.getIndex(), node.getId(), name));
}
});
}
});
}
dataMap.putAll(dataObjectMapForPlan);
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class IterativeJob method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
TaskExecutor taskExecutor = cEnv.getTaskExecutor();
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
LOG.log(Level.INFO, "Task worker starting: " + workerId);
IterativeSourceTask g = new IterativeSourceTask();
PartitionTask r = new PartitionTask();
ComputeGraphBuilder graphBuilder = ComputeGraphBuilder.newBuilder(config);
graphBuilder.addSource("source", g, 4);
ComputeConnection computeConnection = graphBuilder.addCompute("sink", r, 4);
computeConnection.partition("source").viaEdge("partition").withDataType(MessageTypes.OBJECT);
graphBuilder.setMode(OperationMode.BATCH);
ComputeGraph graph = graphBuilder.build();
ExecutionPlan plan = taskExecutor.plan(graph);
IExecutor ex = taskExecutor.createExecution(graph, plan);
for (int i = 0; i < 10; i++) {
LOG.info("Starting iteration: " + i);
taskExecutor.addInput(graph, plan, "source", "input", new DataObjectImpl<>(config));
// this is a blocking call
ex.execute();
DataObject<Object> dataSet = taskExecutor.getOutput(graph, plan, "sink");
DataPartition<Object>[] values = dataSet.getPartitions();
}
ex.closeExecution();
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class ConstraintTaskExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
int workerId = workerEnv.getWorkerId();
Config config = workerEnv.getConfig();
long startTime = System.currentTimeMillis();
LOG.log(Level.INFO, "Task worker starting: " + workerId);
ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
TaskExecutor taskExecutor = cEnv.getTaskExecutor();
String dinput = String.valueOf(config.get(DataObjectConstants.DINPUT_DIRECTORY));
int dimension = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DIMENSIONS)));
int parallelismValue = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.PARALLELISM_VALUE)));
int dsize = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DSIZE)));
DataGenerator dataGenerator = new DataGenerator(config, workerId);
dataGenerator.generate(new Path(dinput), dsize, dimension);
ComputeGraph firstGraph = buildFirstGraph(parallelismValue, config, dinput, dsize, dimension, "firstgraphpoints", "1");
ComputeGraph secondGraph = buildSecondGraph(parallelismValue, config, dimension, "firstgraphpoints", "1");
// Get the execution plan for the first task graph
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(firstGraph);
taskExecutor.execute(firstGraph, firstGraphExecutionPlan);
DataObject<Object> firstGraphObject = taskExecutor.getOutput("firstsink");
// Get the execution plan for the second task graph
ExecutionPlan secondGraphExecutionPlan = taskExecutor.plan(secondGraph);
taskExecutor.addInput("firstgraphpoints", firstGraphObject);
taskExecutor.execute(secondGraph, secondGraphExecutionPlan);
long endTime = System.currentTimeMillis();
LOG.info("Total Execution Time: " + (endTime - startTime));
}
Aggregations