Search in sources :

Example 16 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class TSetCachingExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
    SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < 10;
        }

        @Override
        public Integer next() {
            return count++;
        }
    }, 4);
    ComputeTSet<Object> twoComputes = sourceX.direct().compute((itr, c) -> {
        itr.forEachRemaining(i -> {
            c.collect(i * 5);
        });
    }).direct().compute((itr, c) -> {
        itr.forEachRemaining(i -> {
            c.collect((int) i + 2);
        });
    });
    CachedTSet<Object> cached = twoComputes.cache();
    // when cache is called, twister2 will run everything upto this point and cache the result
    // into the memory. Cached TSets can be added as inputs for other TSets and operations.
    SourceTSet<Integer> sourceZ = env.createSource(new SourceFunc<Integer>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < 10;
        }

        @Override
        public Integer next() {
            return count++;
        }
    }, 4);
    ComputeTSet<Integer> calc = sourceZ.direct().compute(new ComputeCollectorFunc<Iterator<Integer>, Integer>() {

        private DataPartitionConsumer<Integer> xValues;

        @Override
        public void prepare(TSetContext context) {
            this.xValues = (DataPartitionConsumer<Integer>) context.getInput("x").getConsumer();
        }

        @Override
        public void compute(Iterator<Integer> zValues, RecordCollector<Integer> output) {
            while (zValues.hasNext()) {
                output.collect(xValues.next() + zValues.next());
            }
        }
    });
    calc.addInput("x", cached);
    calc.direct().forEach(i -> {
        LOG.info("(x * 5) + 2 + z =" + i);
    });
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Iterator(java.util.Iterator) DataPartitionConsumer(edu.iu.dsc.tws.api.dataset.DataPartitionConsumer)

Example 17 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class TSetCommunicationExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
    SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < 10;
        }

        @Override
        public Integer next() {
            return count++;
        }
    }, 4);
    sourceX.direct().compute((itr, collector) -> {
        itr.forEachRemaining(i -> {
            collector.collect(i * 5);
        });
    }).direct().compute((itr, collector) -> {
        itr.forEachRemaining(i -> {
            collector.collect((int) i + 2);
        });
    }).reduce((i1, i2) -> {
        return (int) i1 + (int) i2;
    }).forEach(i -> {
        LOG.info("SUM=" + i);
    });
}
Also used : Twister2Job(edu.iu.dsc.tws.api.Twister2Job) Twister2Submitter(edu.iu.dsc.tws.rsched.job.Twister2Submitter) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) SourceFunc(edu.iu.dsc.tws.api.tset.fn.SourceFunc) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) Twister2Worker(edu.iu.dsc.tws.api.resource.Twister2Worker) Serializable(java.io.Serializable) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment)

Example 18 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class TSetGatherExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    super.execute(workerEnv);
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    // set the parallelism of source to task stage 0
    int srcPara = jobParameters.getTaskStages().get(0);
    int sinkPara = jobParameters.getTaskStages().get(1);
    SourceTSet<int[]> source = env.createSource(new TestBaseSource(), srcPara).setName("Source");
    GatherTLink<int[]> gather = source.gather();
    SinkTSet<Iterator<Tuple<Integer, int[]>>> sink = gather.sink((SinkFunc<Iterator<Tuple<Integer, int[]>>>) val -> {
        int[] value = null;
        while (val.hasNext()) {
            value = val.next().getValue();
        }
        experimentData.setOutput(value);
        LOG.info("Results " + Arrays.toString(value));
        try {
            verify(OperationNames.GATHER);
        } catch (VerificationException e) {
            LOG.info("Exception Message : " + e.getMessage());
        }
        return true;
    });
    env.run(sink);
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Arrays(java.util.Arrays) Iterator(java.util.Iterator) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) VerificationException(edu.iu.dsc.tws.examples.verification.VerificationException) GatherTLink(edu.iu.dsc.tws.tset.links.batch.GatherTLink) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) SinkTSet(edu.iu.dsc.tws.tset.sets.batch.SinkTSet) OperationNames(edu.iu.dsc.tws.api.compute.OperationNames) BaseTSetBatchWorker(edu.iu.dsc.tws.examples.tset.BaseTSetBatchWorker) Logger(java.util.logging.Logger) SinkFunc(edu.iu.dsc.tws.api.tset.fn.SinkFunc) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) VerificationException(edu.iu.dsc.tws.examples.verification.VerificationException)

Example 19 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class WordCount method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    Config config = env.getConfig();
    String input = config.getStringValue("input");
    String output = config.getStringValue("output");
    System.out.println("Rank " + env.getWorkerID());
    Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
    options.setTSetEnvironment(env);
    options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
    runWordCount(options, input, output);
}
Also used : Twister2PipelineOptions(org.apache.beam.runners.twister2.Twister2PipelineOptions) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Config(edu.iu.dsc.tws.api.config.Config)

Example 20 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class MinimalWordCount method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    System.out.println("Rank " + env.getWorkerID());
    Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
    options.setTSetEnvironment(env);
    options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
    // Create the Pipeline object with the options we defined above
    Pipeline p = Pipeline.create(options);
    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
    // of input text files. TextIO.Read returns a PCollection where each element is one line from
    // the input text (a set of Shakespeare's texts).
    // This example reads a public data set consisting of the complete works of Shakespeare.
    p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply(FlatMapElements.into(TypeDescriptors.strings()).via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))).apply(Filter.by((String word) -> !word.isEmpty())).apply(Count.perElement()).apply(MapElements.into(TypeDescriptors.strings()).via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())).apply(TextIO.write().to("wordcounts"));
    p.run().waitUntilFinish();
}
Also used : Twister2PipelineOptions(org.apache.beam.runners.twister2.Twister2PipelineOptions) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)59 Config (edu.iu.dsc.tws.api.config.Config)24 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)24 JobConfig (edu.iu.dsc.tws.api.JobConfig)23 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)23 Logger (java.util.logging.Logger)23 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)22 HashMap (java.util.HashMap)22 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)21 Iterator (java.util.Iterator)21 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)18 ComputeCollectorFunc (edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc)12 ComputeFunc (edu.iu.dsc.tws.api.tset.fn.ComputeFunc)12 TSetContext (edu.iu.dsc.tws.api.tset.TSetContext)7 SinkTSet (edu.iu.dsc.tws.tset.sets.batch.SinkTSet)6 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)5 MapFunc (edu.iu.dsc.tws.api.tset.fn.MapFunc)5 SinkFunc (edu.iu.dsc.tws.api.tset.fn.SinkFunc)5 Twister2Submitter (edu.iu.dsc.tws.rsched.job.Twister2Submitter)5 ComputeTSet (edu.iu.dsc.tws.tset.sets.batch.ComputeTSet)5