Search in sources :

Example 21 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class KafkaExample method main.

public static void main(String[] args) throws ParseException {
    Options options = new Options();
    options.addOption(CLI_SERVER, true, "Kafka bootstrap server in the format host:port");
    options.addOption(CLI_TOPICS, true, "Set of topics in the format topic1,topic2");
    CommandLineParser cliParser = new DefaultParser();
    CommandLine cli = cliParser.parse(options, args);
    HashMap<String, Object> configs = new HashMap<>();
    configs.put(CLI_SERVER, "localhost:9092");
    configs.put(CLI_TOPICS, Collections.singleton("test2"));
    if (cli.hasOption(CLI_SERVER)) {
        configs.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cli.getOptionValue(CLI_SERVER));
    }
    if (cli.hasOption(CLI_TOPICS)) {
        String topics = cli.getOptionValue(CLI_TOPICS);
        Set<String> topicsSet = Arrays.stream(topics.split(",")).map(String::trim).collect(Collectors.toSet());
        configs.put(CLI_TOPICS, topicsSet);
    }
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    JobConfig jobConfig = new JobConfig();
    jobConfig.putAll(configs);
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setJobName(KafkaExample.class.getName()).setWorkerClass(KafkaExample.class).addComputeResource(1, 1024, 1).setConfig(jobConfig).build();
    // now submit the job
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 22 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class TeraSort method main.

public static void main(String[] args) throws ParseException {
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    JobConfig jobConfig = new JobConfig();
    Options options = new Options();
    // file based mode configuration
    options.addOption(createOption(ARG_INPUT_FILE, true, "Path to the file containing input tuples. " + "Path can be specified with %d, where it will be replaced by task index. For example," + "input-%d, will be considered as input-0 in source task having index 0.", false));
    // non-file based mode configurations
    options.addOption(createOption(ARG_SIZE, true, "Total Data Size in GigaBytes for all workers.", true));
    options.addOption(createOption(ARG_KEY_SIZE, true, "Size of the key in bytes of a single Tuple", true));
    options.addOption(createOption(ARG_KEY_SEED, true, "Size of the key in bytes of a single Tuple", false));
    options.addOption(createOption(ARG_VALUE_SIZE, true, "Size of the value in bytes of a single Tuple", true));
    // resources
    options.addOption(createOption(ARG_RESOURCE_CPU, true, "Amount of CPUs to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_MEMORY, true, "Amount of Memory in mega bytes to allocate per instance", true));
    options.addOption(createOption(ARG_RESOURCE_INSTANCES, true, "No. of instances", true));
    options.addOption(createOption(ARG_RESOURCE_VOLATILE_DISK, true, "Volatile Disk for each worker at K8s", false));
    options.addOption(createOption(ARG_WORKERS_PER_POD, true, "Workers per pod in Kubernetes", false));
    // tasks and sources counts
    options.addOption(createOption(ARG_TASKS_SOURCES, true, "No of source tasks", true));
    options.addOption(createOption(ARG_TASKS_SINKS, true, "No of sink tasks", true));
    // optional configurations (tune performance)
    options.addOption(createOption(ARG_TUNE_MAX_BYTES_IN_MEMORY, true, "Maximum bytes to keep in memory", false));
    options.addOption(createOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, true, "Maximum records to keep in memory", false));
    options.addOption(createOption(ARG_BENCHMARK_METADATA, true, "Auto generated argument by benchmark suite", false));
    // output folder
    options.addOption(createOption(ARG_OUTPUT_FOLDER, true, "Folder to save output files", false));
    // fixed schema
    options.addOption(createOption(ARG_FIXED_SCHEMA, false, "Use fixed schema feature", false));
    // verify option
    options.addOption(createOption(VERIFY, false, "Verify whether the results are sorted.", false));
    CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    if (cmd.hasOption(ARG_INPUT_FILE)) {
        jobConfig.put(ARG_INPUT_FILE, cmd.getOptionValue(ARG_INPUT_FILE));
    } else {
        jobConfig.put(ARG_SIZE, Double.valueOf(cmd.getOptionValue(ARG_SIZE)));
        jobConfig.put(ARG_VALUE_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_VALUE_SIZE)));
        jobConfig.put(ARG_KEY_SIZE, Integer.valueOf(cmd.getOptionValue(ARG_KEY_SIZE)));
    }
    // in GB, default value is 4GB
    double volatileDisk = 0.0;
    if (cmd.hasOption(ARG_RESOURCE_VOLATILE_DISK)) {
        volatileDisk = Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_VOLATILE_DISK));
    }
    // default value is 1
    int workersPerPod = 1;
    if (cmd.hasOption(ARG_WORKERS_PER_POD)) {
        workersPerPod = Integer.valueOf(cmd.getOptionValue(ARG_WORKERS_PER_POD));
    }
    jobConfig.put(ARG_TASKS_SOURCES, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SOURCES)));
    jobConfig.put(ARG_TASKS_SINKS, Integer.valueOf(cmd.getOptionValue(ARG_TASKS_SINKS)));
    jobConfig.put(ARG_RESOURCE_INSTANCES, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)) * workersPerPod);
    if (cmd.hasOption(ARG_TUNE_MAX_BYTES_IN_MEMORY)) {
        long maxBytesInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_BYTES_IN_MEMORY));
        jobConfig.put(SHUFFLE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
        // for benchmark service
        jobConfig.put(ARG_TUNE_MAX_BYTES_IN_MEMORY, maxBytesInMemory);
    }
    if (cmd.hasOption(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE)) {
        long maxRecordsInMemory = Long.valueOf(cmd.getOptionValue(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE));
        jobConfig.put(SHUFFLE_MAX_FILE_SIZE, maxRecordsInMemory);
        jobConfig.put(ARG_TUNE_MAX_SHUFFLE_FILE_SIZE, maxRecordsInMemory);
    }
    if (cmd.hasOption(ARG_BENCHMARK_METADATA)) {
        jobConfig.put(ARG_BENCHMARK_METADATA, cmd.getOptionValue(ARG_BENCHMARK_METADATA));
        jobConfig.put(ARG_RUN_BENCHMARK, true);
    }
    if (cmd.hasOption(ARG_OUTPUT_FOLDER)) {
        jobConfig.put(ARG_OUTPUT_FOLDER, cmd.getOptionValue(ARG_OUTPUT_FOLDER));
    }
    if (cmd.hasOption(ARG_FIXED_SCHEMA)) {
        jobConfig.put(ARG_FIXED_SCHEMA, true);
    }
    if (cmd.hasOption(VERIFY)) {
        jobConfig.put(VERIFY, true);
    }
    Twister2Job twister2Job;
    twister2Job = Twister2Job.newBuilder().setJobName("terasort").setWorkerClass(TeraSort.class.getName()).addComputeResource(Double.valueOf(cmd.getOptionValue(ARG_RESOURCE_CPU)), Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_MEMORY)), volatileDisk, Integer.valueOf(cmd.getOptionValue(ARG_RESOURCE_INSTANCES)), workersPerPod).setConfig(jobConfig).build();
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) CommandLineParser(org.apache.commons.cli.CommandLineParser) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 23 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class TeraSort method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    int workerID = workerEnv.getWorkerId();
    ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
    Config config = workerEnv.getConfig();
    resultsRecorder = new BenchmarkResultsRecorder(config, workerID == 0);
    Timing.setDefaultTimingUnit(TimingUnit.MILLI_SECONDS);
    final String filePath = config.getStringValue(ARG_INPUT_FILE, null);
    final int keySize = config.getIntegerValue(ARG_KEY_SIZE, 10);
    final int valueSize = config.getIntegerValue(ARG_VALUE_SIZE, 90);
    // Sampling Graph : if file based only
    TaskPartitioner taskPartitioner;
    if (filePath != null) {
        ComputeGraphBuilder samplingGraph = ComputeGraphBuilder.newBuilder(config);
        samplingGraph.setMode(OperationMode.BATCH);
        Sampler samplerTask = new Sampler();
        samplingGraph.addSource(TASK_SAMPLER, samplerTask, config.getIntegerValue(ARG_TASKS_SOURCES, 4));
        SamplerReduce samplerReduce = new SamplerReduce();
        samplingGraph.addCompute(TASK_SAMPLER_REDUCE, samplerReduce, config.getIntegerValue(ARG_RESOURCE_INSTANCES, 4)).allreduce(TASK_SAMPLER).viaEdge(EDGE).withReductionFunction(byte[].class, (minMax1, minMax2) -> {
            byte[] min1 = Arrays.copyOfRange(minMax1, 0, keySize);
            byte[] max1 = Arrays.copyOfRange(minMax1, keySize, minMax1.length);
            byte[] min2 = Arrays.copyOfRange(minMax2, 0, keySize);
            byte[] max2 = Arrays.copyOfRange(minMax2, keySize, minMax2.length);
            byte[] newMinMax = new byte[keySize * 2];
            byte[] min = min1;
            byte[] max = max1;
            if (ByteArrayComparator.getInstance().compare(min1, min2) > 0) {
                min = min2;
            }
            if (ByteArrayComparator.getInstance().compare(max1, max2) < 0) {
                max = max2;
            }
            System.arraycopy(min, 0, newMinMax, 0, keySize);
            System.arraycopy(max, 0, newMinMax, keySize, keySize);
            return newMinMax;
        });
        ComputeGraph sampleGraphBuild = samplingGraph.build();
        ExecutionPlan sampleTaskPlan = cEnv.getTaskExecutor().plan(sampleGraphBuild);
        cEnv.getTaskExecutor().execute(sampleGraphBuild, sampleTaskPlan);
        DataObject<byte[]> output = cEnv.getTaskExecutor().getOutput("sample-reduce");
        LOG.info("Sample output received");
        taskPartitioner = new TaskPartitionerForSampledData(output.getPartitions()[0].getConsumer().next(), keySize);
    } else {
        taskPartitioner = new TaskPartitionerForRandom();
    }
    // Sort Graph
    ComputeGraphBuilder teraSortTaskGraph = ComputeGraphBuilder.newBuilder(config);
    teraSortTaskGraph.setMode(OperationMode.BATCH);
    BaseSource dataSource;
    if (filePath == null) {
        dataSource = new RandomDataSource();
    } else {
        dataSource = new FileDataSource();
    }
    teraSortTaskGraph.addSource(TASK_SOURCE, dataSource, config.getIntegerValue(ARG_TASKS_SOURCES, 4));
    Receiver receiver = new Receiver();
    KeyedGatherConfig keyedGatherConfig = teraSortTaskGraph.addCompute(TASK_RECV, receiver, config.getIntegerValue(ARG_TASKS_SINKS, 4)).keyedGather(TASK_SOURCE).viaEdge(EDGE).withDataType(MessageTypes.BYTE_ARRAY).withKeyType(MessageTypes.BYTE_ARRAY).withTaskPartitioner(taskPartitioner).useDisk(true).sortBatchByKey(ByteArrayComparator.getInstance()).groupBatchByKey(false);
    if (config.getBooleanValue(ARG_FIXED_SCHEMA, false)) {
        LOG.info("Using fixed schema feature with message size : " + (keySize + valueSize) + " and key size : " + keySize);
        keyedGatherConfig.withMessageSchema(MessageSchema.ofSize(keySize + valueSize, keySize));
    }
    ComputeGraph computeGraph = teraSortTaskGraph.build();
    ExecutionPlan executionPlan = cEnv.getTaskExecutor().plan(computeGraph);
    cEnv.getTaskExecutor().execute(computeGraph, executionPlan);
    cEnv.close();
    LOG.info("Finished Sorting...");
}
Also used : BaseSource(edu.iu.dsc.tws.api.compute.nodes.BaseSource) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) BenchmarkResultsRecorder(edu.iu.dsc.tws.examples.utils.bench.BenchmarkResultsRecorder) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) KeyedGatherConfig(edu.iu.dsc.tws.task.impl.ops.KeyedGatherConfig) ComputeEnvironment(edu.iu.dsc.tws.task.ComputeEnvironment) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) TaskPartitioner(edu.iu.dsc.tws.api.compute.TaskPartitioner)

Example 24 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class WordCountWorker method execute.

@Override
public void execute(WorkerEnvironment wEnv) {
    this.workerEnv = wEnv;
    this.workerId = workerEnv.getWorkerId();
    taskStages.add(NO_OF_TASKS);
    taskStages.add(NO_OF_TASKS);
    // lets create the task plan
    this.logicalPlan = Utils.createStageLogicalPlan(workerEnv, taskStages);
    setupTasks();
    // create the communication
    wordAggregator = new WordAggregator();
    keyGather = new BKeyedReduce(workerEnv.getCommunicator(), logicalPlan, sources, destinations, new ReduceFunction() {

        @Override
        public void init(Config cfg, DataFlowOperation op, Map<Integer, List<Integer>> expectedIds) {
        }

        @Override
        public Object reduce(Object t1, Object t2) {
            return (Integer) t1 + (Integer) t2;
        }
    }, wordAggregator, MessageTypes.OBJECT, MessageTypes.INTEGER, new HashingSelector());
    // assign the task ids to the workers, and run them using threads
    scheduleTasks();
    // progress the communication
    progress();
    // close communication
    workerEnv.close();
}
Also used : BKeyedReduce(edu.iu.dsc.tws.comms.batch.BKeyedReduce) Config(edu.iu.dsc.tws.api.config.Config) DataFlowOperation(edu.iu.dsc.tws.api.comms.DataFlowOperation) ReduceFunction(edu.iu.dsc.tws.api.comms.ReduceFunction) HashingSelector(edu.iu.dsc.tws.comms.selectors.HashingSelector) Map(java.util.Map)

Example 25 with Config

use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.

the class CheckpointingTaskExample method main.

public static void main(String[] args) {
    int numberOfWorkers = 4;
    if (args.length == 1) {
        numberOfWorkers = Integer.valueOf(args[0]);
    }
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    // lets put a configuration here
    JobConfig jobConfig = new JobConfig();
    jobConfig.put("parallelism", numberOfWorkers);
    Twister2Job twister2Job = Twister2Job.newBuilder().setJobName("hello-checkpointing-job").setWorkerClass(CheckpointingTaskExample.class).addComputeResource(1, 1024, numberOfWorkers).setConfig(jobConfig).build();
    // now submit the job
    Twister2Submitter.submitJob(twister2Job, config);
}
Also used : Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Job(edu.iu.dsc.tws.api.Twister2Job)

Aggregations

Config (edu.iu.dsc.tws.api.config.Config)169 JobConfig (edu.iu.dsc.tws.api.JobConfig)101 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)52 CommandLine (org.apache.commons.cli.CommandLine)27 CommandLineParser (org.apache.commons.cli.CommandLineParser)27 DefaultParser (org.apache.commons.cli.DefaultParser)27 Options (org.apache.commons.cli.Options)27 HashMap (java.util.HashMap)26 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)18 Map (java.util.Map)15 TaskSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskSchedulePlan)13 WorkerPlan (edu.iu.dsc.tws.api.compute.schedule.elements.WorkerPlan)12 LinkedHashMap (java.util.LinkedHashMap)12 Test (org.junit.Test)12 Path (edu.iu.dsc.tws.api.data.Path)10 TaskInstancePlan (edu.iu.dsc.tws.api.compute.schedule.elements.TaskInstancePlan)9 WorkerSchedulePlan (edu.iu.dsc.tws.api.compute.schedule.elements.WorkerSchedulePlan)9 JobAPI (edu.iu.dsc.tws.proto.system.job.JobAPI)9 TaskSchedulerClassTest (edu.iu.dsc.tws.tsched.utils.TaskSchedulerClassTest)9 ExecutionPlan (edu.iu.dsc.tws.api.compute.executor.ExecutionPlan)8