Search in sources :

Example 36 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class KGatherExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int start = env.getWorkerID() * 100;
    SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
    KeyedGatherTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedGather();
    LOG.info("test foreach");
    klink.forEach((ApplyFunc<Tuple<Integer, Iterator<Integer>>>) data -> LOG.info(data.getKey() + " -> " + iterToString(data.getValue())));
    LOG.info("test map");
    klink.map((MapFunc<Tuple<Integer, Iterator<Integer>>, String>) input -> {
        int s = 0;
        while (input.getValue().hasNext()) {
            s += input.getValue().next();
        }
        return input.getKey() + " -> " + s;
    }).direct().forEach(s -> LOG.info("map: " + s));
    LOG.info("test compute");
    klink.compute((ComputeFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) input -> {
        StringBuilder s = new StringBuilder();
        while (input.hasNext()) {
            Tuple<Integer, Iterator<Integer>> next = input.next();
            s.append(" [").append(next.getKey()).append(" -> ").append(iterToString(next.getValue())).append("] ");
        }
        return s.toString();
    }).direct().forEach(s -> LOG.info("compute: concat " + s));
    LOG.info("test computec");
    klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) (input, output) -> {
        while (input.hasNext()) {
            Tuple<Integer, Iterator<Integer>> next = input.next();
            output.collect(next.getKey() + " -> " + iterToString(next.getValue()));
        }
    }).direct().forEach(s -> LOG.info("computec: " + s));
    // Test byte[] key value pairs for KeyedGather
    SourceTSet<String> srcString = dummyStringSource(env, 25, PARALLELISM);
    KeyedGatherTLink<byte[], Integer> keyedGatherLink = srcString.mapToTuple(s -> new Tuple<>(s.getBytes(), 1)).keyedGather();
    LOG.info("test foreach");
    keyedGatherLink.forEach((ApplyFunc<Tuple<byte[], Iterator<Integer>>>) data -> LOG.info(new String(data.getKey()) + " -> " + iterToString(data.getValue())));
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) ComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) KeyedGatherTLink(edu.iu.dsc.tws.tset.links.batch.KeyedGatherTLink) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) ApplyFunc(edu.iu.dsc.tws.api.tset.fn.ApplyFunc) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 37 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class KPartitionExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int start = env.getWorkerID() * 100;
    SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
    KeyedPartitionTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedPartition(new LoadBalancePartitioner<>());
    LOG.info("test foreach");
    klink.forEach(t -> LOG.info(t.getKey() + "_" + t.getValue()));
    LOG.info("test map");
    klink.map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
    LOG.info("test compute");
    klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
        StringBuilder s = new StringBuilder();
        while (input.hasNext()) {
            s.append(input.next().toString()).append(" ");
        }
        return s.toString();
    }).direct().forEach(s -> LOG.info("compute: concat " + s));
    LOG.info("test computec");
    klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
        while (input.hasNext()) {
            output.collect(input.next().toString());
        }
    }).direct().forEach(s -> LOG.info("computec: " + s));
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) ComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) LoadBalancePartitioner(edu.iu.dsc.tws.tset.fn.LoadBalancePartitioner) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) KeyedPartitionTLink(edu.iu.dsc.tws.tset.links.batch.KeyedPartitionTLink) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 38 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class PersistExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int start = env.getWorkerID() * 100;
    SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
    // test direct().cache() which has IterLink semantics
    PersistedTSet<Integer> cache = src.direct().persist();
    runOps(env, cache);
    // test reduce().cache() which has SingleLink semantics
    LOG.info("test persist after reduce");
    PersistedTSet<Integer> cache1 = src.reduce(Integer::sum).persist();
    runOps(env, cache1);
    // test gather.cache() which has TupleValueIterLink
    LOG.info("test persist after gather");
    PersistedTSet<Integer> cache2 = src.gather().persist();
    runOps(env, cache2);
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment)

Example 39 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class HelloTwister2 method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment)

Example 40 with BatchEnvironment

use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.

the class TSetTeraSort method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    final int parallelism = env.getConfig().getIntegerValue(PARAM_PARALLELISM);
    final int dataSize = env.getConfig().getIntegerValue(PARAM_DATA_SIZE_GB);
    KeyedSourceTSet<byte[], byte[]> keyedSource = env.createKeyedSource(new SourceFunc<Tuple<byte[], byte[]>>() {

        private Queue<byte[]> keys = new LinkedList<>();

        private byte[] data = new byte[90];

        @Override
        public void prepare(TSetContext context) {
            Arrays.fill(data, (byte) 1);
            Random random = new Random();
            int noOfTuples = (int) ((dataSize * 1024 * 1024 * 1024 * 1.0d) / parallelism / 100);
            if (context.getIndex() == 0) {
                LOG.info(noOfTuples + " tuples will be produced in each source");
            }
            for (int i = 0; i < noOfTuples; i++) {
                byte[] key = new byte[10];
                random.nextBytes(key);
                keys.add(key);
            }
        }

        @Override
        public boolean hasNext() {
            return !keys.isEmpty();
        }

        @Override
        public Tuple<byte[], byte[]> next() {
            return new Tuple<>(keys.poll(), data);
        }
    }, parallelism);
    keyedSource.keyedGather(new PartitionFunc<byte[]>() {

        protected int keysToOneTask;

        protected int[] destinationsList;

        @Override
        public void prepare(Set<Integer> sources, Set<Integer> destinations) {
            // considering only most significant bytes of array
            int totalPossibilities = 256 * 256;
            this.keysToOneTask = (int) Math.ceil(totalPossibilities / (double) destinations.size());
            this.destinationsList = new int[destinations.size()];
            int index = 0;
            for (int i : destinations) {
                destinationsList[index++] = i;
            }
            Arrays.sort(this.destinationsList);
        }

        int getIndex(byte[] array) {
            int key = ((array[0] & 0xff) << 8) + (array[1] & 0xff);
            return key / keysToOneTask;
        }

        @Override
        public int partition(int sourceIndex, byte[] val) {
            return this.destinationsList[this.getIndex(val)];
        }

        @Override
        public void commit(int source, int partition) {
        }
    }, (left, right) -> ByteArrayComparator.getInstance().compare(left, right)).useDisk().forEach(new ApplyFunc<Tuple<byte[], Iterator<byte[]>>>() {

        private byte[] previousKey;

        @Override
        public void apply(Tuple<byte[], Iterator<byte[]>> data) {
            if (previousKey != null) {
                int compare = ByteArrayComparator.getInstance().compare(previousKey, data.getKey());
                if (compare > 0) {
                    LOG.warning("Unsorted keys detected. TeraSort has failed. " + compare);
                }
            }
            previousKey = data.getKey();
        }
    });
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) LinkedList(java.util.LinkedList) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Random(java.util.Random) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Aggregations

BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)59 Config (edu.iu.dsc.tws.api.config.Config)24 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)24 JobConfig (edu.iu.dsc.tws.api.JobConfig)23 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)23 Logger (java.util.logging.Logger)23 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)22 HashMap (java.util.HashMap)22 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)21 Iterator (java.util.Iterator)21 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)18 ComputeCollectorFunc (edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc)12 ComputeFunc (edu.iu.dsc.tws.api.tset.fn.ComputeFunc)12 TSetContext (edu.iu.dsc.tws.api.tset.TSetContext)7 SinkTSet (edu.iu.dsc.tws.tset.sets.batch.SinkTSet)6 Twister2Job (edu.iu.dsc.tws.api.Twister2Job)5 MapFunc (edu.iu.dsc.tws.api.tset.fn.MapFunc)5 SinkFunc (edu.iu.dsc.tws.api.tset.fn.SinkFunc)5 Twister2Submitter (edu.iu.dsc.tws.rsched.job.Twister2Submitter)5 ComputeTSet (edu.iu.dsc.tws.tset.sets.batch.ComputeTSet)5