Search in sources :

Example 21 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class TeraSort method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    int workerID = workerEnv.getWorkerId();
    ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
    Config config = workerEnv.getConfig();
    resultsRecorder = new BenchmarkResultsRecorder(config, workerID == 0);
    Timing.setDefaultTimingUnit(TimingUnit.MILLI_SECONDS);
    final String filePath = config.getStringValue(ARG_INPUT_FILE, null);
    final int keySize = config.getIntegerValue(ARG_KEY_SIZE, 10);
    final int valueSize = config.getIntegerValue(ARG_VALUE_SIZE, 90);
    // Sampling Graph : if file based only
    TaskPartitioner taskPartitioner;
    if (filePath != null) {
        ComputeGraphBuilder samplingGraph = ComputeGraphBuilder.newBuilder(config);
        samplingGraph.setMode(OperationMode.BATCH);
        Sampler samplerTask = new Sampler();
        samplingGraph.addSource(TASK_SAMPLER, samplerTask, config.getIntegerValue(ARG_TASKS_SOURCES, 4));
        SamplerReduce samplerReduce = new SamplerReduce();
        samplingGraph.addCompute(TASK_SAMPLER_REDUCE, samplerReduce, config.getIntegerValue(ARG_RESOURCE_INSTANCES, 4)).allreduce(TASK_SAMPLER).viaEdge(EDGE).withReductionFunction(byte[].class, (minMax1, minMax2) -> {
            byte[] min1 = Arrays.copyOfRange(minMax1, 0, keySize);
            byte[] max1 = Arrays.copyOfRange(minMax1, keySize, minMax1.length);
            byte[] min2 = Arrays.copyOfRange(minMax2, 0, keySize);
            byte[] max2 = Arrays.copyOfRange(minMax2, keySize, minMax2.length);
            byte[] newMinMax = new byte[keySize * 2];
            byte[] min = min1;
            byte[] max = max1;
            if (ByteArrayComparator.getInstance().compare(min1, min2) > 0) {
                min = min2;
            }
            if (ByteArrayComparator.getInstance().compare(max1, max2) < 0) {
                max = max2;
            }
            System.arraycopy(min, 0, newMinMax, 0, keySize);
            System.arraycopy(max, 0, newMinMax, keySize, keySize);
            return newMinMax;
        });
        ComputeGraph sampleGraphBuild = samplingGraph.build();
        ExecutionPlan sampleTaskPlan = cEnv.getTaskExecutor().plan(sampleGraphBuild);
        cEnv.getTaskExecutor().execute(sampleGraphBuild, sampleTaskPlan);
        DataObject<byte[]> output = cEnv.getTaskExecutor().getOutput("sample-reduce");
        LOG.info("Sample output received");
        taskPartitioner = new TaskPartitionerForSampledData(output.getPartitions()[0].getConsumer().next(), keySize);
    } else {
        taskPartitioner = new TaskPartitionerForRandom();
    }
    // Sort Graph
    ComputeGraphBuilder teraSortTaskGraph = ComputeGraphBuilder.newBuilder(config);
    teraSortTaskGraph.setMode(OperationMode.BATCH);
    BaseSource dataSource;
    if (filePath == null) {
        dataSource = new RandomDataSource();
    } else {
        dataSource = new FileDataSource();
    }
    teraSortTaskGraph.addSource(TASK_SOURCE, dataSource, config.getIntegerValue(ARG_TASKS_SOURCES, 4));
    Receiver receiver = new Receiver();
    KeyedGatherConfig keyedGatherConfig = teraSortTaskGraph.addCompute(TASK_RECV, receiver, config.getIntegerValue(ARG_TASKS_SINKS, 4)).keyedGather(TASK_SOURCE).viaEdge(EDGE).withDataType(MessageTypes.BYTE_ARRAY).withKeyType(MessageTypes.BYTE_ARRAY).withTaskPartitioner(taskPartitioner).useDisk(true).sortBatchByKey(ByteArrayComparator.getInstance()).groupBatchByKey(false);
    if (config.getBooleanValue(ARG_FIXED_SCHEMA, false)) {
        LOG.info("Using fixed schema feature with message size : " + (keySize + valueSize) + " and key size : " + keySize);
        keyedGatherConfig.withMessageSchema(MessageSchema.ofSize(keySize + valueSize, keySize));
    }
    ComputeGraph computeGraph = teraSortTaskGraph.build();
    ExecutionPlan executionPlan = cEnv.getTaskExecutor().plan(computeGraph);
    cEnv.getTaskExecutor().execute(computeGraph, executionPlan);
    cEnv.close();
    LOG.info("Finished Sorting...");
}
Also used : BaseSource(edu.iu.dsc.tws.api.compute.nodes.BaseSource) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) BenchmarkResultsRecorder(edu.iu.dsc.tws.examples.utils.bench.BenchmarkResultsRecorder) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) ComputeEnvironment(edu.iu.dsc.tws.task.ComputeEnvironment) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) TaskPartitioner(edu.iu.dsc.tws.api.compute.TaskPartitioner)

Example 22 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class CheckpointingTaskExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    ComputeEnvironment computeEnvironment = ComputeEnvironment.init(workerEnv);
    ComputeGraphBuilder computeGraphBuilder = computeEnvironment.newTaskGraph(OperationMode.STREAMING);
    int parallelism = workerEnv.getConfig().getIntegerValue("parallelism", 1);
    computeGraphBuilder.addSource("source", new SourceTask(), parallelism);
    computeGraphBuilder.addCompute("compute", new ComputeTask(), parallelism).direct("source").viaEdge("so-c").withDataType(MessageTypes.INTEGER);
    computeGraphBuilder.addCompute("sink", new SinkTask(), parallelism).direct("compute").viaEdge("c-si").withDataType(MessageTypes.INTEGER);
    computeEnvironment.buildAndExecute(computeGraphBuilder);
    computeEnvironment.close();
}
Also used : ComputeEnvironment(edu.iu.dsc.tws.task.ComputeEnvironment) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder)

Example 23 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class BatchTaskSchedulerExample method buildSecondGraph.

private static ComputeGraph buildSecondGraph(int parallelism, Config conf) {
    // Add source, compute, and sink tasks to the task graph builder for the third task graph
    SecondSourceTask sourceTask = new SecondSourceTask();
    SecondComputeTask computeTask = new SecondComputeTask();
    SecondSinkTask sinkTask = new SecondSinkTask("secondgraphpoints");
    ComputeGraphBuilder secondGraphBuilder = ComputeGraphBuilder.newBuilder(conf);
    secondGraphBuilder.addSource("secondsource", sourceTask, parallelism);
    ComputeConnection computeConnection = secondGraphBuilder.addCompute("secondcompute", computeTask, parallelism);
    ComputeConnection sinkConnection = secondGraphBuilder.addCompute("secondsink", sinkTask, parallelism);
    // Creating the communication edges between the tasks for the second task graph
    computeConnection.direct("secondsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    sinkConnection.direct("secondcompute").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    secondGraphBuilder.setMode(OperationMode.BATCH);
    secondGraphBuilder.setTaskGraphName("secondTG");
    return secondGraphBuilder.build();
}
Also used : ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Example 24 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class BatchTaskSchedulerExample method buildThirdGraph.

private static ComputeGraph buildThirdGraph(int parallelism, Config conf) {
    // Add source, compute, and sink tasks to the task graph builder for the third task graph
    ThirdSourceTask sourceTask = new ThirdSourceTask();
    ThirdSinkTask sinkTask = new ThirdSinkTask("thirdgraphpoints");
    ComputeGraphBuilder thirdGraphBuilder = ComputeGraphBuilder.newBuilder(conf);
    thirdGraphBuilder.addSource("thirdsource", sourceTask, parallelism);
    ComputeConnection sinkConnection = thirdGraphBuilder.addCompute("thirdsink", sinkTask, parallelism);
    // Creating the communication edges between the tasks for the third task graph
    sinkConnection.allreduce("thirdsource").viaEdge("all-reduce").withReductionFunction(new Aggregator()).withDataType(MessageTypes.OBJECT);
    thirdGraphBuilder.setMode(OperationMode.BATCH);
    thirdGraphBuilder.setTaskGraphName("thirdTG");
    return thirdGraphBuilder.build();
}
Also used : ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Example 25 with ComputeGraphBuilder

use of edu.iu.dsc.tws.task.impl.ComputeGraphBuilder in project twister2 by DSC-SPIDAL.

the class ConstraintTaskExample method buildSecondGraph.

private ComputeGraph buildSecondGraph(int parallelism, Config conf, int dimension, String inputKey, String constraint) {
    SecondSourceTask sourceTask = new SecondSourceTask(inputKey);
    SecondSinkTask sinkTask = new SecondSinkTask(dimension);
    ComputeGraphBuilder secondGraphBuilder = ComputeGraphBuilder.newBuilder(conf);
    secondGraphBuilder.addSource("secondsource", sourceTask, parallelism);
    ComputeConnection computeConnection = secondGraphBuilder.addCompute("secondsink", sinkTask, parallelism);
    computeConnection.direct("secondsource").viaEdge(Context.TWISTER2_DIRECT_EDGE).withDataType(MessageTypes.OBJECT);
    secondGraphBuilder.setMode(OperationMode.BATCH);
    secondGraphBuilder.setTaskGraphName("secondTG");
    secondGraphBuilder.addGraphConstraints(Context.TWISTER2_MAX_TASK_INSTANCES_PER_WORKER, constraint);
    return secondGraphBuilder.build();
}
Also used : ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection)

Aggregations

ComputeGraphBuilder (edu.iu.dsc.tws.task.impl.ComputeGraphBuilder)66 ComputeConnection (edu.iu.dsc.tws.task.impl.ComputeConnection)55 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)39 TaskSchedulerClassTest (edu.iu.dsc.tws.tsched.utils.TaskSchedulerClassTest)16 ExecutionPlan (edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)10 DataFlowGraph (edu.iu.dsc.tws.task.cdfw.DataFlowGraph)8 ComputeEnvironment (edu.iu.dsc.tws.task.ComputeEnvironment)7 GraphDataSource (edu.iu.dsc.tws.graphapi.partition.GraphDataSource)6 Config (edu.iu.dsc.tws.api.config.Config)4 ConnectedSink (edu.iu.dsc.tws.task.cdfw.task.ConnectedSink)4 DataFileReplicatedReadSource (edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource)4 Path (edu.iu.dsc.tws.api.data.Path)3 DataObjectSource (edu.iu.dsc.tws.task.dataobjects.DataObjectSource)3 ReduceFn (edu.iu.dsc.tws.task.impl.function.ReduceFn)3 JobConfig (edu.iu.dsc.tws.api.JobConfig)2 DataObject (edu.iu.dsc.tws.api.dataset.DataObject)2 TextInputSplit (edu.iu.dsc.tws.data.api.splits.TextInputSplit)2 IterativeSVMDataObjectCompute (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectCompute)2 IterativeSVMDataObjectDirectSink (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMDataObjectDirectSink)2 IterativeSVMWeightVectorObjectCompute (edu.iu.dsc.tws.examples.ml.svm.data.IterativeSVMWeightVectorObjectCompute)2