Search in sources :

Example 86 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class KPartitionExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int start = env.getWorkerID() * 100;
    SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
    KeyedPartitionTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedPartition(new LoadBalancePartitioner<>());
    LOG.info("test foreach");
    klink.forEach(t -> LOG.info(t.getKey() + "_" + t.getValue()));
    LOG.info("test map");
    klink.map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
    LOG.info("test compute");
    klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
        StringBuilder s = new StringBuilder();
        while (input.hasNext()) {
            s.append(input.next().toString()).append(" ");
        }
        return s.toString();
    }).direct().forEach(s -> LOG.info("compute: concat " + s));
    LOG.info("test computec");
    klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
        while (input.hasNext()) {
            output.collect(input.next().toString());
        }
    }).direct().forEach(s -> LOG.info("computec: " + s));
}
Also used : Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) ComputeCollectorFunc(edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) LoadBalancePartitioner(edu.iu.dsc.tws.tset.fn.LoadBalancePartitioner) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) Logger(java.util.logging.Logger) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) ComputeFunc(edu.iu.dsc.tws.api.tset.fn.ComputeFunc) KeyedPartitionTLink(edu.iu.dsc.tws.tset.links.batch.KeyedPartitionTLink) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 87 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class TSetTeraSort method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    final int parallelism = env.getConfig().getIntegerValue(PARAM_PARALLELISM);
    final int dataSize = env.getConfig().getIntegerValue(PARAM_DATA_SIZE_GB);
    KeyedSourceTSet<byte[], byte[]> keyedSource = env.createKeyedSource(new SourceFunc<Tuple<byte[], byte[]>>() {

        private Queue<byte[]> keys = new LinkedList<>();

        private byte[] data = new byte[90];

        @Override
        public void prepare(TSetContext context) {
            Arrays.fill(data, (byte) 1);
            Random random = new Random();
            int noOfTuples = (int) ((dataSize * 1024 * 1024 * 1024 * 1.0d) / parallelism / 100);
            if (context.getIndex() == 0) {
                LOG.info(noOfTuples + " tuples will be produced in each source");
            }
            for (int i = 0; i < noOfTuples; i++) {
                byte[] key = new byte[10];
                random.nextBytes(key);
                keys.add(key);
            }
        }

        @Override
        public boolean hasNext() {
            return !keys.isEmpty();
        }

        @Override
        public Tuple<byte[], byte[]> next() {
            return new Tuple<>(keys.poll(), data);
        }
    }, parallelism);
    keyedSource.keyedGather(new PartitionFunc<byte[]>() {

        protected int keysToOneTask;

        protected int[] destinationsList;

        @Override
        public void prepare(Set<Integer> sources, Set<Integer> destinations) {
            // considering only most significant bytes of array
            int totalPossibilities = 256 * 256;
            this.keysToOneTask = (int) Math.ceil(totalPossibilities / (double) destinations.size());
            this.destinationsList = new int[destinations.size()];
            int index = 0;
            for (int i : destinations) {
                destinationsList[index++] = i;
            }
            Arrays.sort(this.destinationsList);
        }

        int getIndex(byte[] array) {
            int key = ((array[0] & 0xff) << 8) + (array[1] & 0xff);
            return key / keysToOneTask;
        }

        @Override
        public int partition(int sourceIndex, byte[] val) {
            return this.destinationsList[this.getIndex(val)];
        }

        @Override
        public void commit(int source, int partition) {
        }
    }, (left, right) -> ByteArrayComparator.getInstance().compare(left, right)).useDisk().forEach(new ApplyFunc<Tuple<byte[], Iterator<byte[]>>>() {

        private byte[] previousKey;

        @Override
        public void apply(Tuple<byte[], Iterator<byte[]>> data) {
            if (previousKey != null) {
                int compare = ByteArrayComparator.getInstance().compare(previousKey, data.getKey());
                if (compare > 0) {
                    LOG.warning("Unsorted keys detected. TeraSort has failed. " + compare);
                }
            }
            previousKey = data.getKey();
        }
    });
}
Also used : BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) LinkedList(java.util.LinkedList) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Random(java.util.Random) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 88 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SetSchemaExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    SourceTSet<Integer> src = env.createSource(new BaseSourceFunc<Integer>() {

        private int i = 0;

        @Override
        public void prepare(TSetContext ctx) {
            super.prepare(ctx);
            LOG.info("schemas0: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
        }

        @Override
        public boolean hasNext() {
            return i == 0;
        }

        @Override
        public Integer next() {
            return ++i;
        }
    }, 2).setName("src");
    src.direct().forEach(ii -> LOG.info("out0: " + ii));
    src.withSchema(PrimitiveSchemas.INTEGER).direct().forEach(ii -> LOG.info("out1: " + ii));
    ComputeTSet<String> map = src.allReduce(Integer::sum).map(new BaseMapFunc<Integer, String>() {

        @Override
        public void prepare(TSetContext ctx) {
            super.prepare(ctx);
            LOG.info("schemas1: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
        }

        @Override
        public String map(Integer input) {
            return input.toString();
        }
    });
    map.direct().forEach(ii -> LOG.info("out2: " + ii));
    map.withSchema(PrimitiveSchemas.STRING).direct().forEach(ii -> LOG.info("out3: " + ii));
    KeyedTSet<String, Integer> keyed = map.mapToTuple(new BaseMapFunc<String, Tuple<String, Integer>>() {

        @Override
        public void prepare(TSetContext ctx) {
            super.prepare(ctx);
            LOG.info("schemas2: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
        }

        @Override
        public Tuple<String, Integer> map(String input) {
            return new Tuple<>(input, Integer.parseInt(input));
        }
    });
    keyed.keyedDirect().forEach(ii -> LOG.info("out4: " + ii));
    keyed.withSchema(new KeyedSchema(MessageTypes.STRING, MessageTypes.INTEGER)).keyedDirect().forEach(ii -> LOG.info("out5: " + ii));
}
Also used : KeyedSchema(edu.iu.dsc.tws.api.tset.schema.KeyedSchema) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) BaseSourceFunc(edu.iu.dsc.tws.api.tset.fn.BaseSourceFunc) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 89 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class KeyedPipeTLink method persist.

@Override
public KeyedPersistedTSet<K, V> persist() {
    // handling checkpointing
    if (getTSetEnv().isCheckpointingEnabled()) {
        String persistVariableName = this.getId() + "-persisted";
        BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
        Boolean persisted = chkEnv.initVariable(persistVariableName, false);
        if (persisted) {
            // create a source function with the capability to read from disk
            DiskPartitionBackedSource<Tuple<K, V>> sourceFn = new DiskPartitionBackedSource<>(this.getId());
            // pass the source fn to the checkpointed tset (that would create a source tset from the
            // source function, the same way as a persisted tset. This preserves the order of tsets
            // that are being created in the checkpointed env)
            KeyedCheckpointedTSet<K, V> checkTSet = new KeyedCheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
            // adding checkpointed tset to the graph, so that the IDs would not change
            addChildToGraph(checkTSet);
            // run only the checkpointed tset so that it would populate the inputs in the executor
            getTSetEnv().runOne(checkTSet);
            return checkTSet;
        } else {
            KeyedPersistedTSet<K, V> storable = this.doPersist();
            chkEnv.updateVariable(persistVariableName, true);
            chkEnv.commit();
            return storable;
        }
    }
    return doPersist();
}
Also used : DiskPartitionBackedSource(edu.iu.dsc.tws.tset.sources.DiskPartitionBackedSource) KeyedCheckpointedTSet(edu.iu.dsc.tws.tset.sets.batch.KeyedCheckpointedTSet) BatchChkPntEnvironment(edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 90 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SGatherExample method compute.

@Override
protected void compute(WorkerEnvironment workerEnv) {
    if (jobParameters.getTargets() != 1) {
        LOG.warning("Setting targets to 1. Found, " + jobParameters.getTargets());
        jobParameters.getTaskStages().set(1, 1);
    }
    LogicalPlanBuilder logicalPlanBuilder = LogicalPlanBuilder.plan(jobParameters.getSources(), jobParameters.getTargets(), workerEnv);
    // create the communication
    gather = new SGather(workerEnv.getCommunicator(), logicalPlanBuilder, MessageTypes.INTEGER_ARRAY, new FinalReduceReceiver());
    Set<Integer> tasksOfExecutor = logicalPlanBuilder.getSourcesOnThisWorker();
    for (int t : tasksOfExecutor) {
        finishedSources.put(t, false);
    }
    if (tasksOfExecutor.size() == 0) {
        sourcesDone = true;
    }
    if (!logicalPlan.getLogicalIdsOfWorker(workerId).contains(logicalPlanBuilder.getTargets().iterator().next())) {
        gatherDone = true;
    }
    this.resultsVerifier = new ResultsVerifier<>(inputDataArray, (dataArray, args) -> {
        List<Tuple<Integer, int[]>> listOfArrays = new ArrayList<>();
        for (int i = 0; i < logicalPlanBuilder.getSources().size(); i++) {
            listOfArrays.add(new Tuple<>(i, dataArray));
        }
        return listOfArrays.iterator();
    }, new IteratorComparator<>(new TupleComparator<>(IntComparator.getInstance(), IntArrayComparator.getInstance())));
    // now initialize the workers
    for (int t : tasksOfExecutor) {
        // the map thread where data is produced
        Thread mapThread = new Thread(new BenchWorker.MapWorker(t));
        mapThread.start();
    }
}
Also used : IntArrayComparator(edu.iu.dsc.tws.examples.verification.comparators.IntArrayComparator) TIMING_MESSAGE_RECV(edu.iu.dsc.tws.examples.utils.bench.BenchmarkConstants.TIMING_MESSAGE_RECV) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) BulkReceiver(edu.iu.dsc.tws.api.comms.BulkReceiver) Iterator(java.util.Iterator) Set(java.util.Set) LogicalPlanBuilder(edu.iu.dsc.tws.comms.utils.LogicalPlanBuilder) Config(edu.iu.dsc.tws.api.config.Config) Timing(edu.iu.dsc.tws.examples.utils.bench.Timing) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) SGather(edu.iu.dsc.tws.comms.stream.SGather) BenchWorker(edu.iu.dsc.tws.examples.comms.BenchWorker) TIMING_ALL_RECV(edu.iu.dsc.tws.examples.utils.bench.BenchmarkConstants.TIMING_ALL_RECV) ArrayList(java.util.ArrayList) TupleComparator(edu.iu.dsc.tws.examples.verification.comparators.TupleComparator) List(java.util.List) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) IteratorComparator(edu.iu.dsc.tws.examples.verification.comparators.IteratorComparator) IntComparator(edu.iu.dsc.tws.examples.verification.comparators.IntComparator) BenchmarkUtils(edu.iu.dsc.tws.examples.utils.bench.BenchmarkUtils) ResultsVerifier(edu.iu.dsc.tws.examples.verification.ResultsVerifier) LogicalPlanBuilder(edu.iu.dsc.tws.comms.utils.LogicalPlanBuilder) SGather(edu.iu.dsc.tws.comms.stream.SGather) BenchWorker(edu.iu.dsc.tws.examples.comms.BenchWorker) IteratorComparator(edu.iu.dsc.tws.examples.verification.comparators.IteratorComparator) ArrayList(java.util.ArrayList) List(java.util.List) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Aggregations

Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)98 Iterator (java.util.Iterator)38 List (java.util.List)35 Logger (java.util.logging.Logger)34 ArrayList (java.util.ArrayList)29 Config (edu.iu.dsc.tws.api.config.Config)27 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)24 Test (org.junit.Test)24 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)18 InMessage (edu.iu.dsc.tws.comms.dfw.InMessage)17 HashMap (java.util.HashMap)16 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)15 JobConfig (edu.iu.dsc.tws.api.JobConfig)14 MessageTypes (edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes)14 JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)14 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)14 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)13 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)11 MessageType (edu.iu.dsc.tws.api.comms.messaging.types.MessageType)11 Comparator (java.util.Comparator)11