Search in sources :

Example 1 with MapFunc

use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.

the class ArrowTSetSourceExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    Config config = env.getConfig();
    String csvInputDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY);
    String arrowInputDirectory = config.getStringValue(DataObjectConstants.ARROW_DIRECTORY);
    String arrowFileName = config.getStringValue(DataObjectConstants.FILE_NAME);
    int workers = config.getIntegerValue(DataObjectConstants.WORKERS);
    int parallel = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
    int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
    LOG.info("arrow input file:" + arrowFileName + "\t" + arrowInputDirectory + "\t" + csvInputDirectory + "\t" + workers + "\t" + parallel);
    Schema schema = makeSchema();
    SourceTSet<String[]> csvSource = env.createCSVSource(csvInputDirectory, dsize, parallel, "split");
    SinkTSet<Iterator<Integer>> sinkTSet = csvSource.direct().map((MapFunc<String[], Integer>) input -> Integer.parseInt(input[0])).direct().sink(new ArrowBasedSinkFunction<>(arrowInputDirectory, arrowFileName, schema.toJson()));
    env.run(sinkTSet);
    // Source Function Call
    env.createArrowSource(arrowInputDirectory, arrowFileName, parallel, schema.toJson()).direct().compute((ComputeFunc<Iterator<Object>, List<Integer>>) input -> {
        List<Integer> integers = new ArrayList<>();
        input.forEachRemaining(i -> integers.add((Integer) i));
        return integers;
    }).direct().forEach(s -> LOG.info("Integer Array Size:" + s.size() + "\tvalues:" + s));
}
Also used : Twister2Job(edu.iu.dsc.tws.api.Twister2Job) ArrowBasedSinkFunction(edu.iu.dsc.tws.tset.fn.impl.ArrowBasedSinkFunction) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) Options(org.apache.commons.cli.Options) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) JobConfig(edu.iu.dsc.tws.api.JobConfig) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) DefaultParser(org.apache.commons.cli.DefaultParser) ImmutableList(com.google.common.collect.ImmutableList) CommandLine(org.apache.commons.cli.CommandLine) Iterator(java.util.Iterator) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) CommandLineParser(org.apache.commons.cli.CommandLineParser) FieldType(org.apache.arrow.vector.types.pojo.FieldType) SinkTSet(edu.iu.dsc.tws.tset.sets.batch.SinkTSet) Field(org.apache.arrow.vector.types.pojo.Field) Logger(java.util.logging.Logger) Utils(edu.iu.dsc.tws.examples.Utils) DataObjectConstants(edu.iu.dsc.tws.data.utils.DataObjectConstants) Serializable(java.io.Serializable) Twister2Submitter(edu.iu.dsc.tws.rsched.job.Twister2Submitter) List(java.util.List) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) Twister2Worker(edu.iu.dsc.tws.api.resource.Twister2Worker) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) Schema(org.apache.arrow.vector.types.pojo.Schema) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List)

Example 2 with MapFunc

use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.

the class KGatherUngroupedExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);
    KeyedGatherUngroupedTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 4, i)).keyedGatherUngrouped();
    LOG.info("test foreach");
    klink.forEach((ApplyFunc<Tuple<Integer, Integer>>) data -> LOG.info(data.getKey() + " -> " + data.getValue()));
    LOG.info("test map");
    klink.map((MapFunc<Tuple<Integer, Integer>, String>) input -> input.getKey() + " -> " + input.getValue()).direct().forEach(s -> LOG.info("map: " + s));
    LOG.info("test compute");
    klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
        StringBuilder sb = new StringBuilder();
        while (input.hasNext()) {
            Tuple<Integer, Integer> next = input.next();
            sb.append("[").append(next.getKey()).append("->").append(next.getValue()).append("]");
        }
        return sb.toString();
    }).direct().forEach(s -> LOG.info("compute: " + s));
    LOG.info("test computec");
    klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
        while (input.hasNext()) {
            Tuple<Integer, Integer> next = input.next();
            output.collect(next.getKey() + " -> " + next.getValue() * 2);
        }
    }).direct().forEach(s -> LOG.info("computec: " + s));
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) ComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) ApplyFunc(edu.iu.dsc.tws.api.tset.fn.ApplyFunc) KeyedGatherUngroupedTLink(edu.iu.dsc.tws.tset.links.batch.KeyedGatherUngroupedTLink) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 3 with MapFunc

use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.

the class HadoopTSet method execute.

@Override
public void execute(Config config, JobAPI.Job job, IWorkerController workerController, IPersistentVolume persistentVolume, IVolatileVolume volatileVolume) {
    int workerId = workerController.getWorkerInfo().getWorkerID();
    WorkerEnvironment workerEnv = WorkerEnvironment.init(config, job, workerController, persistentVolume, volatileVolume);
    BatchEnvironment tSetEnv = TSetEnvironment.initBatch(workerEnv);
    Configuration configuration = new Configuration();
    configuration.addResource(new Path(HdfsDataContext.getHdfsConfigDirectory(config)));
    configuration.set(TextInputFormat.INPUT_DIR, "/input4");
    SourceTSet<String> source = tSetEnv.createHadoopSource(configuration, TextInputFormat.class, 4, new MapFunc<Tuple<LongWritable, Text>, String>() {

        @Override
        public String map(Tuple<LongWritable, Text> input) {
            return input.getKey().toString() + " : " + input.getValue().toString();
        }
    });
    SinkTSet<Iterator<String>> sink = source.direct().sink((SinkFunc<Iterator<String>>) value -> {
        while (value.hasNext()) {
            String next = value.next();
            LOG.info("Received value: " + next);
        }
        return true;
    });
    tSetEnv.run(sink);
}
Also used : Path(org.apache.hadoop.fs.Path) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) HdfsDataContext(edu.iu.dsc.tws.data.utils.HdfsDataContext) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Text(org.apache.hadoop.io.Text) IPersistentVolume(edu.iu.dsc.tws.api.resource.IPersistentVolume) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) LongWritable(org.apache.hadoop.io.LongWritable) JobConfig(edu.iu.dsc.tws.api.JobConfig) TextInputFormat(org.apache.hadoop.mapreduce.lib.input.TextInputFormat) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) IVolatileVolume(edu.iu.dsc.tws.api.resource.IVolatileVolume) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) SinkTSet(edu.iu.dsc.tws.tset.sets.batch.SinkTSet) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI) Logger(java.util.logging.Logger) SinkFunc(edu.iu.dsc.tws.api.tset.fn.SinkFunc) Serializable(java.io.Serializable) Twister2Submitter(edu.iu.dsc.tws.rsched.job.Twister2Submitter) IWorker(edu.iu.dsc.tws.api.resource.IWorker) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) IWorkerController(edu.iu.dsc.tws.api.resource.IWorkerController) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) Configuration(org.apache.hadoop.conf.Configuration) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Text(org.apache.hadoop.io.Text) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) Iterator(java.util.Iterator) LongWritable(org.apache.hadoop.io.LongWritable) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 4 with MapFunc

use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.

the class HelloTSet method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    LOG.info("Strating Hello TSet Example");
    int para = env.getConfig().getIntegerValue("para", 4);
    SourceTSet<int[]> source = env.createSource(new SourceFunc<int[]>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < para;
        }

        @Override
        public int[] next() {
            count++;
            return new int[] { 1, 1, 1 };
        }
    }, para).setName("source");
    PartitionTLink<int[]> partitioned = source.partition(new LoadBalancePartitioner<>());
    ComputeTSet<int[]> mapedPartition = partitioned.map((MapFunc<int[], int[]>) input -> Arrays.stream(input).map(a -> a * 2).toArray());
    ReduceTLink<int[]> reduce = mapedPartition.reduce((t1, t2) -> {
        int[] ret = new int[t1.length];
        for (int i = 0; i < t1.length; i++) {
            ret[i] = t1[i] + t2[i];
        }
        return ret;
    });
    SinkTSet<int[]> sink = reduce.sink(value -> {
        LOG.info("Results " + Arrays.toString(value));
        return false;
    });
    env.run(sink);
    LOG.info("Ending  Hello TSet Example");
}
Also used : Twister2Job(edu.iu.dsc.tws.api.Twister2Job) Arrays(java.util.Arrays) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) Options(org.apache.commons.cli.Options) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) JobConfig(edu.iu.dsc.tws.api.JobConfig) DefaultParser(org.apache.commons.cli.DefaultParser) ReduceTLink(edu.iu.dsc.tws.tset.links.batch.ReduceTLink) CommandLine(org.apache.commons.cli.CommandLine) ComputeTSet(edu.iu.dsc.tws.tset.sets.batch.ComputeTSet) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) CommandLineParser(org.apache.commons.cli.CommandLineParser) SinkTSet(edu.iu.dsc.tws.tset.sets.batch.SinkTSet) SourceFunc(edu.iu.dsc.tws.api.tset.fn.SourceFunc) LoadBalancePartitioner(edu.iu.dsc.tws.tset.fn.LoadBalancePartitioner) Logger(java.util.logging.Logger) Serializable(java.io.Serializable) Twister2Submitter(edu.iu.dsc.tws.rsched.job.Twister2Submitter) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ParseException(org.apache.commons.cli.ParseException) PartitionTLink(edu.iu.dsc.tws.tset.links.batch.PartitionTLink) Twister2Worker(edu.iu.dsc.tws.api.resource.Twister2Worker) SourceFunc(edu.iu.dsc.tws.api.tset.fn.SourceFunc) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment)

Example 5 with MapFunc

use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.

the class KGatherExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int start = env.getWorkerID() * 100;
    SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
    KeyedGatherTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedGather();
    LOG.info("test foreach");
    klink.forEach((ApplyFunc<Tuple<Integer, Iterator<Integer>>>) data -> LOG.info(data.getKey() + " -> " + iterToString(data.getValue())));
    LOG.info("test map");
    klink.map((MapFunc<Tuple<Integer, Iterator<Integer>>, String>) input -> {
        int s = 0;
        while (input.getValue().hasNext()) {
            s += input.getValue().next();
        }
        return input.getKey() + " -> " + s;
    }).direct().forEach(s -> LOG.info("map: " + s));
    LOG.info("test compute");
    klink.compute((ComputeFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) input -> {
        StringBuilder s = new StringBuilder();
        while (input.hasNext()) {
            Tuple<Integer, Iterator<Integer>> next = input.next();
            s.append(" [").append(next.getKey()).append(" -> ").append(iterToString(next.getValue())).append("] ");
        }
        return s.toString();
    }).direct().forEach(s -> LOG.info("compute: concat " + s));
    LOG.info("test computec");
    klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) (input, output) -> {
        while (input.hasNext()) {
            Tuple<Integer, Iterator<Integer>> next = input.next();
            output.collect(next.getKey() + " -> " + iterToString(next.getValue()));
        }
    }).direct().forEach(s -> LOG.info("computec: " + s));
    // Test byte[] key value pairs for KeyedGather
    SourceTSet<String> srcString = dummyStringSource(env, 25, PARALLELISM);
    KeyedGatherTLink<byte[], Integer> keyedGatherLink = srcString.mapToTuple(s -> new Tuple<>(s.getBytes(), 1)).keyedGather();
    LOG.info("test foreach");
    keyedGatherLink.forEach((ApplyFunc<Tuple<byte[], Iterator<Integer>>>) data -> LOG.info(new String(data.getKey()) + " -> " + iterToString(data.getValue())));
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) ComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) KeyedGatherTLink(edu.iu.dsc.tws.tset.links.batch.KeyedGatherTLink) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) ApplyFunc(edu.iu.dsc.tws.api.tset.fn.ApplyFunc) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Aggregations

JobConfig (edu.iu.dsc.tws.api.JobConfig)5 Config (edu.iu.dsc.tws.api.config.Config)5 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)5 MapFunc (edu.iu.dsc.tws.api.tset.fn.MapFunc)5 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)5 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)5 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)5 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)5 HashMap (java.util.HashMap)5 Logger (java.util.logging.Logger)5 Iterator (java.util.Iterator)4 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)3 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)3 ComputeFunc (edu.iu.dsc.tws.api.tset.fn.ComputeFunc)3 Twister2Submitter (edu.iu.dsc.tws.rsched.job.Twister2Submitter)3 SinkTSet (edu.iu.dsc.tws.tset.sets.batch.SinkTSet)3 Serializable (java.io.Serializable)3 Twister2Worker (edu.iu.dsc.tws.api.resource.Twister2Worker)2 ApplyFunc (edu.iu.dsc.tws.api.tset.fn.ApplyFunc)2 ComputeCollectorFunc (edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc)2