use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class KGatherExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
KeyedGatherTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedGather();
LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Iterator<Integer>>>) data -> LOG.info(data.getKey() + " -> " + iterToString(data.getValue())));
LOG.info("test map");
klink.map((MapFunc<Tuple<Integer, Iterator<Integer>>, String>) input -> {
int s = 0;
while (input.getValue().hasNext()) {
s += input.getValue().next();
}
return input.getKey() + " -> " + s;
}).direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) input -> {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
s.append(" [").append(next.getKey()).append(" -> ").append(iterToString(next.getValue())).append("] ");
}
return s.toString();
}).direct().forEach(s -> LOG.info("compute: concat " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) (input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
output.collect(next.getKey() + " -> " + iterToString(next.getValue()));
}
}).direct().forEach(s -> LOG.info("computec: " + s));
// Test byte[] key value pairs for KeyedGather
SourceTSet<String> srcString = dummyStringSource(env, 25, PARALLELISM);
KeyedGatherTLink<byte[], Integer> keyedGatherLink = srcString.mapToTuple(s -> new Tuple<>(s.getBytes(), 1)).keyedGather();
LOG.info("test foreach");
keyedGatherLink.forEach((ApplyFunc<Tuple<byte[], Iterator<Integer>>>) data -> LOG.info(new String(data.getKey()) + " -> " + iterToString(data.getValue())));
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class KPartitionExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
KeyedPartitionTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedPartition(new LoadBalancePartitioner<>());
LOG.info("test foreach");
klink.forEach(t -> LOG.info(t.getKey() + "_" + t.getValue()));
LOG.info("test map");
klink.map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
s.append(input.next().toString()).append(" ");
}
return s.toString();
}).direct().forEach(s -> LOG.info("compute: concat " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
while (input.hasNext()) {
output.collect(input.next().toString());
}
}).direct().forEach(s -> LOG.info("computec: " + s));
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class PersistExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
// test direct().cache() which has IterLink semantics
PersistedTSet<Integer> cache = src.direct().persist();
runOps(env, cache);
// test reduce().cache() which has SingleLink semantics
LOG.info("test persist after reduce");
PersistedTSet<Integer> cache1 = src.reduce(Integer::sum).persist();
runOps(env, cache1);
// test gather.cache() which has TupleValueIterLink
LOG.info("test persist after gather");
PersistedTSet<Integer> cache2 = src.gather().persist();
runOps(env, cache2);
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class HelloTwister2 method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class TSetTeraSort method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
final int parallelism = env.getConfig().getIntegerValue(PARAM_PARALLELISM);
final int dataSize = env.getConfig().getIntegerValue(PARAM_DATA_SIZE_GB);
KeyedSourceTSet<byte[], byte[]> keyedSource = env.createKeyedSource(new SourceFunc<Tuple<byte[], byte[]>>() {
private Queue<byte[]> keys = new LinkedList<>();
private byte[] data = new byte[90];
@Override
public void prepare(TSetContext context) {
Arrays.fill(data, (byte) 1);
Random random = new Random();
int noOfTuples = (int) ((dataSize * 1024 * 1024 * 1024 * 1.0d) / parallelism / 100);
if (context.getIndex() == 0) {
LOG.info(noOfTuples + " tuples will be produced in each source");
}
for (int i = 0; i < noOfTuples; i++) {
byte[] key = new byte[10];
random.nextBytes(key);
keys.add(key);
}
}
@Override
public boolean hasNext() {
return !keys.isEmpty();
}
@Override
public Tuple<byte[], byte[]> next() {
return new Tuple<>(keys.poll(), data);
}
}, parallelism);
keyedSource.keyedGather(new PartitionFunc<byte[]>() {
protected int keysToOneTask;
protected int[] destinationsList;
@Override
public void prepare(Set<Integer> sources, Set<Integer> destinations) {
// considering only most significant bytes of array
int totalPossibilities = 256 * 256;
this.keysToOneTask = (int) Math.ceil(totalPossibilities / (double) destinations.size());
this.destinationsList = new int[destinations.size()];
int index = 0;
for (int i : destinations) {
destinationsList[index++] = i;
}
Arrays.sort(this.destinationsList);
}
int getIndex(byte[] array) {
int key = ((array[0] & 0xff) << 8) + (array[1] & 0xff);
return key / keysToOneTask;
}
@Override
public int partition(int sourceIndex, byte[] val) {
return this.destinationsList[this.getIndex(val)];
}
@Override
public void commit(int source, int partition) {
}
}, (left, right) -> ByteArrayComparator.getInstance().compare(left, right)).useDisk().forEach(new ApplyFunc<Tuple<byte[], Iterator<byte[]>>>() {
private byte[] previousKey;
@Override
public void apply(Tuple<byte[], Iterator<byte[]>> data) {
if (previousKey != null) {
int compare = ByteArrayComparator.getInstance().compare(previousKey, data.getKey());
if (compare > 0) {
LOG.warning("Unsorted keys detected. TeraSort has failed. " + compare);
}
}
previousKey = data.getKey();
}
});
}
Aggregations