use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class PredictionSourceTask method getWeightVectorByWeightVectorObject.
public Object getWeightVectorByWeightVectorObject(int taskIndex, DataObject<?> datapointsDataObject) {
Iterator<ArrayList> arrayListIterator = (Iterator<ArrayList>) datapointsDataObject.getPartition(taskIndex).getConsumer().next();
List<Object> items = new ArrayList<>();
while (arrayListIterator.hasNext()) {
Object object = arrayListIterator.next();
items.add(object);
}
return items;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class PredictionSourceTask method getDataPointsByDataObject.
public Object getDataPointsByDataObject(int taskIndex, DataObject<?> datapointsDataObject) {
Iterator<ArrayList> arrayListIterator = (Iterator<ArrayList>) datapointsDataObject.getPartition(taskIndex).getConsumer().next();
List<Object> items = new ArrayList<>();
while (arrayListIterator.hasNext()) {
Object object = arrayListIterator.next();
items.add(object);
}
return items;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeTrainingDataLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeTrainingDataLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getTrainingDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("training-data-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Training Data");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method executeWeightVectorLoadingTaskGraph.
/**
* This method loads the training data in a distributed mode
* dataStreamerParallelism is the amount of parallelism used
* in loaded the data in parallel.
*
* @return twister2 DataObject containing the training data
*/
public DataObject<Object> executeWeightVectorLoadingTaskGraph() {
DataObject<Object> data = null;
DataObjectSource sourceTask = new DataObjectSource(Context.TWISTER2_DIRECT_EDGE, this.svmJobParameters.getWeightVectorDataDir());
DataObjectSink sinkTask = new DataObjectSink();
trainingBuilder.addSource(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE, sourceTask, dataStreamerParallelism);
ComputeConnection firstGraphComputeConnection = trainingBuilder.addCompute(Constants.SimpleGraphConfig.DATA_OBJECT_SINK, sinkTask, dataStreamerParallelism);
firstGraphComputeConnection.direct(Constants.SimpleGraphConfig.DATA_OBJECT_SOURCE).viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
trainingBuilder.setMode(OperationMode.BATCH);
ComputeGraph datapointsTaskGraph = trainingBuilder.build();
datapointsTaskGraph.setGraphName("weight-vector-loading-graph");
ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(datapointsTaskGraph);
taskExecutor.execute(datapointsTaskGraph, firstGraphExecutionPlan);
data = taskExecutor.getOutput(datapointsTaskGraph, firstGraphExecutionPlan, Constants.SimpleGraphConfig.DATA_OBJECT_SINK);
if (data == null) {
throw new NullPointerException("Something Went Wrong in Loading Weight Vector");
} else {
LOG.info("Training Data Total Partitions : " + data.getPartitions().length);
}
return data;
}
use of edu.iu.dsc.tws.api.dataset.DataObject in project twister2 by DSC-SPIDAL.
the class SvmSgdAdvancedRunner method retriveFinalTestingAccuracy.
/**
* Calculates the final accuracy by taking the dataParallelism in to consideration
* Here the parallelism is vital as we need to know the average accuracy produced by
* each testing data set.
*
* @param finalRes DataObject which contains the final accuracy
*/
public double retriveFinalTestingAccuracy(DataObject<Object> finalRes) {
double avgAcc = 0;
Object o = finalRes.getPartitions()[0].getConsumer().next();
if (o instanceof Double) {
avgAcc = ((double) o) / dataStreamerParallelism;
LOG.info(String.format("Testing Accuracy : %f ", avgAcc));
} else {
LOG.severe("Something Went Wrong In Calculating Testing Accuracy");
}
return avgAcc;
}
Aggregations