Search in sources :

Example 1 with TSetContext

use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.

the class KeyedCheckpointingExample method execute.

@Override
public void execute(WorkerEnvironment workerEnvironment) {
    BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
    int count = 5;
    KeyedSourceTSet<String, Integer> src = dummySource(env, count, 0);
    KeyedPersistedTSet<String, Integer> persist = src.keyedDirect().persist();
    persist.keyedDirect().forEach(i -> LOG.info(i.toString()));
    KeyedSourceTSet<String, Integer> src1 = dummySource(env, count, 10);
    src1.keyedDirect().compute(new BaseComputeFunc<Iterator<Tuple<String, Integer>>, String>() {

        private DataPartitionConsumer<Tuple<String, Integer>> in;

        @Override
        public void prepare(TSetContext ctx) {
            super.prepare(ctx);
            in = (DataPartitionConsumer<Tuple<String, Integer>>) ctx.getInput("in").getConsumer();
        }

        @Override
        public String compute(Iterator<Tuple<String, Integer>> input) {
            StringBuilder out = new StringBuilder();
            while (input.hasNext() && in.hasNext()) {
                Tuple<String, Integer> t = input.next();
                Tuple<String, Integer> next = in.next();
                out.append("(").append(t).append(",").append(next).append(") ");
            }
            return out.toString();
        }
    }).addInput("in", persist).direct().forEach(i -> LOG.info(i));
}
Also used : BaseComputeFunc(edu.iu.dsc.tws.api.tset.fn.BaseComputeFunc) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) BatchChkPntEnvironment(edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment) Iterator(java.util.Iterator) DataPartitionConsumer(edu.iu.dsc.tws.api.dataset.DataPartitionConsumer) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 2 with TSetContext

use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.

the class TSetAllGatherExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    super.execute(workerEnv);
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    // set the parallelism of source to task stage 0
    List<Integer> taskStages = jobParameters.getTaskStages();
    int sourceParallelism = taskStages.get(0);
    int sinkParallelism = taskStages.get(1);
    SourceTSet<int[]> source = env.createSource(new TestBaseSource(), sourceParallelism).setName("Source");
    AllGatherTLink<int[]> gather = source.allGather();
    SinkTSet<Iterator<Tuple<Integer, int[]>>> sink = gather.sink(new SinkFunc<Iterator<Tuple<Integer, int[]>>>() {

        private TSetContext context;

        @Override
        public boolean add(Iterator<Tuple<Integer, int[]>> value) {
            // todo: check this!
            int[] result = new int[0];
            while (value.hasNext()) {
                Tuple<Integer, int[]> t = value.next();
                if (t.getKey().equals(context.getIndex())) {
                    result = t.getValue();
                    break;
                }
            }
            LOG.info("Task Id : " + context.getIndex() + " Results " + Arrays.toString(result));
            experimentData.setOutput(value);
            try {
                verify(OperationNames.ALLGATHER);
            } catch (VerificationException e) {
                LOG.info("Exception Message : " + e.getMessage());
            }
            return true;
        }

        @Override
        public void prepare(TSetContext ctx) {
            this.context = ctx;
        }
    });
    env.run(sink);
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Iterator(java.util.Iterator) VerificationException(edu.iu.dsc.tws.examples.verification.VerificationException) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 3 with TSetContext

use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.

the class PartitionExample method execute.

@Override
public void execute(WorkerEnvironment workerEnvironment) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnvironment);
    List<TField> fieldList = new ArrayList<>();
    fieldList.add(new TField("first", MessageTypes.INTEGER));
    fieldList.add(new TField("second", MessageTypes.DOUBLE));
    RowSourceTSet src = env.createRowSource("row", new SourceFunc<Row>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count++ < 1000;
        }

        @Override
        public Row next() {
            return new TwoRow(1, 4.1);
        }
    }, 4).withSchema(new RowSchema(fieldList));
    BatchRowTLink partition = src.partition(new PartitionFunc<Row>() {

        private List<Integer> targets;

        private Random random;

        private int c = 0;

        private Map<Integer, Integer> counts = new HashMap<>();

        @Override
        public void prepare(Set<Integer> sources, Set<Integer> destinations) {
            targets = new ArrayList<>(destinations);
            random = new Random();
            for (int t : targets) {
                counts.put(t, 0);
            }
        }

        @Override
        public int partition(int sourceIndex, Row val) {
            int index = random.nextInt(targets.size());
            int count = counts.get(index);
            counts.put(index, count + 1);
            c++;
            if (c == 1000) {
                LOG.info("COUNTS " + counts);
            }
            return targets.get(index);
        }
    }, 4, 0);
    partition.forEach(new ApplyFunc<Row>() {

        private TSetContext ctx;

        private int count;

        @Override
        public void prepare(TSetContext context) {
            ctx = context;
        }

        @Override
        public void apply(Row data) {
            LOG.info(ctx.getIndex() + " Data " + data.get(0) + ", " + data.get(1) + ", count " + count++);
        }
    });
}
Also used : RowSchema(edu.iu.dsc.tws.api.tset.schema.RowSchema) RowSourceTSet(edu.iu.dsc.tws.tset.sets.batch.row.RowSourceTSet) TField(edu.iu.dsc.tws.common.table.TField) HashMap(java.util.HashMap) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) ArrayList(java.util.ArrayList) SourceFunc(edu.iu.dsc.tws.api.tset.fn.SourceFunc) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Random(java.util.Random) TwoRow(edu.iu.dsc.tws.common.table.TwoRow) BatchRowTLink(edu.iu.dsc.tws.api.tset.link.batch.BatchRowTLink) Row(edu.iu.dsc.tws.common.table.Row) TwoRow(edu.iu.dsc.tws.common.table.TwoRow)

Example 4 with TSetContext

use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.

the class KeyedAddInputsExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    KeyedSourceTSet<String, Integer> src0 = dummyKeyedSource(env, COUNT, PARALLELISM);
    KeyedSourceTSet<String, Integer> src1 = dummyKeyedSourceOther(env, COUNT, PARALLELISM);
    KeyedCachedTSet<String, Integer> cache0 = src0.cache();
    KeyedCachedTSet<String, Integer> cache1 = src1.cache();
    ComputeTSet<String> comp = cache0.keyedDirect().compute(new BaseComputeCollectorFunc<Iterator<Tuple<String, Integer>>, String>() {

        private Map<String, Integer> input1 = new HashMap<>();

        @Override
        public void prepare(TSetContext ctx) {
            super.prepare(ctx);
            // populate the hashmap with values from the input
            DataPartitionConsumer<Tuple<String, Integer>> part = (DataPartitionConsumer<Tuple<String, Integer>>) getInput("input").getConsumer();
            while (part.hasNext()) {
                Tuple<String, Integer> next = part.next();
                input1.put(next.getKey(), next.getValue());
            }
        }

        @Override
        public void compute(Iterator<Tuple<String, Integer>> input, RecordCollector<String> output) {
            while (input.hasNext()) {
                Tuple<String, Integer> next = input.next();
                output.collect(next.getKey() + " -> " + next.getValue() + ", " + input1.get(next.getKey()));
            }
        }
    }).addInput("input", cache1);
    comp.direct().forEach(i -> LOG.info("comp: " + i));
    LOG.info("Test lazy cache!");
    ComputeTSet<Object> forEach = comp.direct().lazyForEach(i -> LOG.info("comp-lazy: " + i));
    for (int i = 0; i < 4; i++) {
        LOG.info("iter: " + i);
        env.eval(forEach);
        try {
            Thread.sleep(2000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    env.finishEval(forEach);
}
Also used : RecordCollector(edu.iu.dsc.tws.api.tset.fn.RecordCollector) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) BaseComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.BaseComputeCollectorFunc) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Iterator(java.util.Iterator) DataPartitionConsumer(edu.iu.dsc.tws.api.dataset.DataPartitionConsumer) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 5 with TSetContext

use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.

the class TSetCachingExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
    SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < 10;
        }

        @Override
        public Integer next() {
            return count++;
        }
    }, 4);
    ComputeTSet<Object> twoComputes = sourceX.direct().compute((itr, c) -> {
        itr.forEachRemaining(i -> {
            c.collect(i * 5);
        });
    }).direct().compute((itr, c) -> {
        itr.forEachRemaining(i -> {
            c.collect((int) i + 2);
        });
    });
    CachedTSet<Object> cached = twoComputes.cache();
    // when cache is called, twister2 will run everything upto this point and cache the result
    // into the memory. Cached TSets can be added as inputs for other TSets and operations.
    SourceTSet<Integer> sourceZ = env.createSource(new SourceFunc<Integer>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count < 10;
        }

        @Override
        public Integer next() {
            return count++;
        }
    }, 4);
    ComputeTSet<Integer> calc = sourceZ.direct().compute(new ComputeCollectorFunc<Iterator<Integer>, Integer>() {

        private DataPartitionConsumer<Integer> xValues;

        @Override
        public void prepare(TSetContext context) {
            this.xValues = (DataPartitionConsumer<Integer>) context.getInput("x").getConsumer();
        }

        @Override
        public void compute(Iterator<Integer> zValues, RecordCollector<Integer> output) {
            while (zValues.hasNext()) {
                output.collect(xValues.next() + zValues.next());
            }
        }
    });
    calc.addInput("x", cached);
    calc.direct().forEach(i -> {
        LOG.info("(x * 5) + 2 + z =" + i);
    });
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Iterator(java.util.Iterator) DataPartitionConsumer(edu.iu.dsc.tws.api.dataset.DataPartitionConsumer)

Aggregations

TSetContext (edu.iu.dsc.tws.api.tset.TSetContext)9 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)6 Iterator (java.util.Iterator)6 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)5 DataPartitionConsumer (edu.iu.dsc.tws.api.dataset.DataPartitionConsumer)4 HashMap (java.util.HashMap)3 Random (java.util.Random)3 BaseComputeFunc (edu.iu.dsc.tws.api.tset.fn.BaseComputeFunc)2 BatchChkPntEnvironment (edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment)2 ArrayList (java.util.ArrayList)2 BaseComputeCollectorFunc (edu.iu.dsc.tws.api.tset.fn.BaseComputeCollectorFunc)1 BaseSourceFunc (edu.iu.dsc.tws.api.tset.fn.BaseSourceFunc)1 RecordCollector (edu.iu.dsc.tws.api.tset.fn.RecordCollector)1 SourceFunc (edu.iu.dsc.tws.api.tset.fn.SourceFunc)1 BatchRowTLink (edu.iu.dsc.tws.api.tset.link.batch.BatchRowTLink)1 KeyedSchema (edu.iu.dsc.tws.api.tset.schema.KeyedSchema)1 RowSchema (edu.iu.dsc.tws.api.tset.schema.RowSchema)1 Row (edu.iu.dsc.tws.common.table.Row)1 TField (edu.iu.dsc.tws.common.table.TField)1 TwoRow (edu.iu.dsc.tws.common.table.TwoRow)1