use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class KPartitionExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
KeyedPartitionTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedPartition(new LoadBalancePartitioner<>());
LOG.info("test foreach");
klink.forEach(t -> LOG.info(t.getKey() + "_" + t.getValue()));
LOG.info("test map");
klink.map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
s.append(input.next().toString()).append(" ");
}
return s.toString();
}).direct().forEach(s -> LOG.info("compute: concat " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
while (input.hasNext()) {
output.collect(input.next().toString());
}
}).direct().forEach(s -> LOG.info("computec: " + s));
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class TSetTeraSort method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
final int parallelism = env.getConfig().getIntegerValue(PARAM_PARALLELISM);
final int dataSize = env.getConfig().getIntegerValue(PARAM_DATA_SIZE_GB);
KeyedSourceTSet<byte[], byte[]> keyedSource = env.createKeyedSource(new SourceFunc<Tuple<byte[], byte[]>>() {
private Queue<byte[]> keys = new LinkedList<>();
private byte[] data = new byte[90];
@Override
public void prepare(TSetContext context) {
Arrays.fill(data, (byte) 1);
Random random = new Random();
int noOfTuples = (int) ((dataSize * 1024 * 1024 * 1024 * 1.0d) / parallelism / 100);
if (context.getIndex() == 0) {
LOG.info(noOfTuples + " tuples will be produced in each source");
}
for (int i = 0; i < noOfTuples; i++) {
byte[] key = new byte[10];
random.nextBytes(key);
keys.add(key);
}
}
@Override
public boolean hasNext() {
return !keys.isEmpty();
}
@Override
public Tuple<byte[], byte[]> next() {
return new Tuple<>(keys.poll(), data);
}
}, parallelism);
keyedSource.keyedGather(new PartitionFunc<byte[]>() {
protected int keysToOneTask;
protected int[] destinationsList;
@Override
public void prepare(Set<Integer> sources, Set<Integer> destinations) {
// considering only most significant bytes of array
int totalPossibilities = 256 * 256;
this.keysToOneTask = (int) Math.ceil(totalPossibilities / (double) destinations.size());
this.destinationsList = new int[destinations.size()];
int index = 0;
for (int i : destinations) {
destinationsList[index++] = i;
}
Arrays.sort(this.destinationsList);
}
int getIndex(byte[] array) {
int key = ((array[0] & 0xff) << 8) + (array[1] & 0xff);
return key / keysToOneTask;
}
@Override
public int partition(int sourceIndex, byte[] val) {
return this.destinationsList[this.getIndex(val)];
}
@Override
public void commit(int source, int partition) {
}
}, (left, right) -> ByteArrayComparator.getInstance().compare(left, right)).useDisk().forEach(new ApplyFunc<Tuple<byte[], Iterator<byte[]>>>() {
private byte[] previousKey;
@Override
public void apply(Tuple<byte[], Iterator<byte[]>> data) {
if (previousKey != null) {
int compare = ByteArrayComparator.getInstance().compare(previousKey, data.getKey());
if (compare > 0) {
LOG.warning("Unsorted keys detected. TeraSort has failed. " + compare);
}
}
previousKey = data.getKey();
}
});
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class SetSchemaExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
SourceTSet<Integer> src = env.createSource(new BaseSourceFunc<Integer>() {
private int i = 0;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas0: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public boolean hasNext() {
return i == 0;
}
@Override
public Integer next() {
return ++i;
}
}, 2).setName("src");
src.direct().forEach(ii -> LOG.info("out0: " + ii));
src.withSchema(PrimitiveSchemas.INTEGER).direct().forEach(ii -> LOG.info("out1: " + ii));
ComputeTSet<String> map = src.allReduce(Integer::sum).map(new BaseMapFunc<Integer, String>() {
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas1: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public String map(Integer input) {
return input.toString();
}
});
map.direct().forEach(ii -> LOG.info("out2: " + ii));
map.withSchema(PrimitiveSchemas.STRING).direct().forEach(ii -> LOG.info("out3: " + ii));
KeyedTSet<String, Integer> keyed = map.mapToTuple(new BaseMapFunc<String, Tuple<String, Integer>>() {
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas2: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public Tuple<String, Integer> map(String input) {
return new Tuple<>(input, Integer.parseInt(input));
}
});
keyed.keyedDirect().forEach(ii -> LOG.info("out4: " + ii));
keyed.withSchema(new KeyedSchema(MessageTypes.STRING, MessageTypes.INTEGER)).keyedDirect().forEach(ii -> LOG.info("out5: " + ii));
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class KeyedPipeTLink method persist.
@Override
public KeyedPersistedTSet<K, V> persist() {
// handling checkpointing
if (getTSetEnv().isCheckpointingEnabled()) {
String persistVariableName = this.getId() + "-persisted";
BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
Boolean persisted = chkEnv.initVariable(persistVariableName, false);
if (persisted) {
// create a source function with the capability to read from disk
DiskPartitionBackedSource<Tuple<K, V>> sourceFn = new DiskPartitionBackedSource<>(this.getId());
// pass the source fn to the checkpointed tset (that would create a source tset from the
// source function, the same way as a persisted tset. This preserves the order of tsets
// that are being created in the checkpointed env)
KeyedCheckpointedTSet<K, V> checkTSet = new KeyedCheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
// adding checkpointed tset to the graph, so that the IDs would not change
addChildToGraph(checkTSet);
// run only the checkpointed tset so that it would populate the inputs in the executor
getTSetEnv().runOne(checkTSet);
return checkTSet;
} else {
KeyedPersistedTSet<K, V> storable = this.doPersist();
chkEnv.updateVariable(persistVariableName, true);
chkEnv.commit();
return storable;
}
}
return doPersist();
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class SGatherExample method compute.
@Override
protected void compute(WorkerEnvironment workerEnv) {
if (jobParameters.getTargets() != 1) {
LOG.warning("Setting targets to 1. Found, " + jobParameters.getTargets());
jobParameters.getTaskStages().set(1, 1);
}
LogicalPlanBuilder logicalPlanBuilder = LogicalPlanBuilder.plan(jobParameters.getSources(), jobParameters.getTargets(), workerEnv);
// create the communication
gather = new SGather(workerEnv.getCommunicator(), logicalPlanBuilder, MessageTypes.INTEGER_ARRAY, new FinalReduceReceiver());
Set<Integer> tasksOfExecutor = logicalPlanBuilder.getSourcesOnThisWorker();
for (int t : tasksOfExecutor) {
finishedSources.put(t, false);
}
if (tasksOfExecutor.size() == 0) {
sourcesDone = true;
}
if (!logicalPlan.getLogicalIdsOfWorker(workerId).contains(logicalPlanBuilder.getTargets().iterator().next())) {
gatherDone = true;
}
this.resultsVerifier = new ResultsVerifier<>(inputDataArray, (dataArray, args) -> {
List<Tuple<Integer, int[]>> listOfArrays = new ArrayList<>();
for (int i = 0; i < logicalPlanBuilder.getSources().size(); i++) {
listOfArrays.add(new Tuple<>(i, dataArray));
}
return listOfArrays.iterator();
}, new IteratorComparator<>(new TupleComparator<>(IntComparator.getInstance(), IntArrayComparator.getInstance())));
// now initialize the workers
for (int t : tasksOfExecutor) {
// the map thread where data is produced
Thread mapThread = new Thread(new BenchWorker.MapWorker(t));
mapThread.start();
}
}
Aggregations