use of edu.iu.dsc.tws.api.dataset.DataPartitionConsumer in project twister2 by DSC-SPIDAL.
the class BufferedCollectionPartition method getConsumer.
@Override
public DataPartitionConsumer<T> getConsumer() {
final Iterator<T> inMemoryIterator = this.dataList.iterator();
final Iterator<Path> fileIterator = this.filesList.iterator();
final Iterator<byte[]> buffersIterator = this.buffers.iterator();
return new DataPartitionConsumer<T>() {
private Queue<byte[]> bufferFromDisk = new LinkedList<>();
@Override
public boolean hasNext() {
return inMemoryIterator.hasNext() || fileIterator.hasNext() || buffersIterator.hasNext() || !bufferFromDisk.isEmpty();
}
@Override
public T next() {
if (!this.bufferFromDisk.isEmpty()) {
return (T) dataType.getDataPacker().unpackFromByteArray(this.bufferFromDisk.poll());
} else if (inMemoryIterator.hasNext()) {
return inMemoryIterator.next();
} else if (fileIterator.hasNext()) {
Path nextFile = fileIterator.next();
try {
DataInputStream reader = new DataInputStream(fileSystem.open(nextFile));
long noOfFrames = reader.readLong();
for (long i = 0; i < noOfFrames; i++) {
int size = reader.readInt();
byte[] data = new byte[size];
int readSoFar = 0;
while (readSoFar < size) {
int readSize = reader.read(data, readSoFar, data.length - readSoFar);
if (readSize == -1) {
throw new Twister2RuntimeException("Reached the EOF unexpectedly");
}
readSoFar += readSize;
}
this.bufferFromDisk.add(data);
}
return next();
} catch (IOException e) {
throw new Twister2RuntimeException("Failed to read value from the temp file : " + nextFile.toString(), e);
}
} else if (buffersIterator.hasNext()) {
return (T) dataType.getDataPacker().unpackFromByteArray(buffersIterator.next());
}
throw new Twister2RuntimeException("No more frames available in this partition");
}
};
}
use of edu.iu.dsc.tws.api.dataset.DataPartitionConsumer in project twister2 by DSC-SPIDAL.
the class KeyedCheckpointingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
int count = 5;
KeyedSourceTSet<String, Integer> src = dummySource(env, count, 0);
KeyedPersistedTSet<String, Integer> persist = src.keyedDirect().persist();
persist.keyedDirect().forEach(i -> LOG.info(i.toString()));
KeyedSourceTSet<String, Integer> src1 = dummySource(env, count, 10);
src1.keyedDirect().compute(new BaseComputeFunc<Iterator<Tuple<String, Integer>>, String>() {
private DataPartitionConsumer<Tuple<String, Integer>> in;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
in = (DataPartitionConsumer<Tuple<String, Integer>>) ctx.getInput("in").getConsumer();
}
@Override
public String compute(Iterator<Tuple<String, Integer>> input) {
StringBuilder out = new StringBuilder();
while (input.hasNext() && in.hasNext()) {
Tuple<String, Integer> t = input.next();
Tuple<String, Integer> next = in.next();
out.append("(").append(t).append(",").append(next).append(") ");
}
return out.toString();
}
}).addInput("in", persist).direct().forEach(i -> LOG.info(i));
}
use of edu.iu.dsc.tws.api.dataset.DataPartitionConsumer in project twister2 by DSC-SPIDAL.
the class KeyedAddInputsExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
KeyedSourceTSet<String, Integer> src0 = dummyKeyedSource(env, COUNT, PARALLELISM);
KeyedSourceTSet<String, Integer> src1 = dummyKeyedSourceOther(env, COUNT, PARALLELISM);
KeyedCachedTSet<String, Integer> cache0 = src0.cache();
KeyedCachedTSet<String, Integer> cache1 = src1.cache();
ComputeTSet<String> comp = cache0.keyedDirect().compute(new BaseComputeCollectorFunc<Iterator<Tuple<String, Integer>>, String>() {
private Map<String, Integer> input1 = new HashMap<>();
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
// populate the hashmap with values from the input
DataPartitionConsumer<Tuple<String, Integer>> part = (DataPartitionConsumer<Tuple<String, Integer>>) getInput("input").getConsumer();
while (part.hasNext()) {
Tuple<String, Integer> next = part.next();
input1.put(next.getKey(), next.getValue());
}
}
@Override
public void compute(Iterator<Tuple<String, Integer>> input, RecordCollector<String> output) {
while (input.hasNext()) {
Tuple<String, Integer> next = input.next();
output.collect(next.getKey() + " -> " + next.getValue() + ", " + input1.get(next.getKey()));
}
}
}).addInput("input", cache1);
comp.direct().forEach(i -> LOG.info("comp: " + i));
LOG.info("Test lazy cache!");
ComputeTSet<Object> forEach = comp.direct().lazyForEach(i -> LOG.info("comp-lazy: " + i));
for (int i = 0; i < 4; i++) {
LOG.info("iter: " + i);
env.eval(forEach);
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
env.finishEval(forEach);
}
use of edu.iu.dsc.tws.api.dataset.DataPartitionConsumer in project twister2 by DSC-SPIDAL.
the class TSetCachingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10;
}
@Override
public Integer next() {
return count++;
}
}, 4);
ComputeTSet<Object> twoComputes = sourceX.direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect(i * 5);
});
}).direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect((int) i + 2);
});
});
CachedTSet<Object> cached = twoComputes.cache();
// when cache is called, twister2 will run everything upto this point and cache the result
// into the memory. Cached TSets can be added as inputs for other TSets and operations.
SourceTSet<Integer> sourceZ = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10;
}
@Override
public Integer next() {
return count++;
}
}, 4);
ComputeTSet<Integer> calc = sourceZ.direct().compute(new ComputeCollectorFunc<Iterator<Integer>, Integer>() {
private DataPartitionConsumer<Integer> xValues;
@Override
public void prepare(TSetContext context) {
this.xValues = (DataPartitionConsumer<Integer>) context.getInput("x").getConsumer();
}
@Override
public void compute(Iterator<Integer> zValues, RecordCollector<Integer> output) {
while (zValues.hasNext()) {
output.collect(xValues.next() + zValues.next());
}
}
});
calc.addInput("x", cached);
calc.direct().forEach(i -> {
LOG.info("(x * 5) + 2 + z =" + i);
});
}
use of edu.iu.dsc.tws.api.dataset.DataPartitionConsumer in project twister2 by DSC-SPIDAL.
the class AddInputsExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
// source with 25..29
SourceTSet<Integer> baseSrc = dummySourceOther(env, COUNT, PARALLELISM);
// source with 0..4
SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);
CachedTSet<Integer> srcCache = src.direct().cache().setName("src");
// make src an input of baseSrc
CachedTSet<Integer> baseSrcCache = baseSrc.direct().cache().setName("baseSrc");
CachedTSet<Integer> out = baseSrcCache.direct().compute(new BaseComputeCollectorFunc<Iterator<Integer>, Integer>() {
@Override
public void compute(Iterator<Integer> input, RecordCollector<Integer> collector) {
DataPartitionConsumer<Integer> c1 = (DataPartitionConsumer<Integer>) getInput("src-input").getConsumer();
while (input.hasNext() && c1.hasNext()) {
collector.collect(input.next() + c1.next());
}
}
}).addInput("src-input", srcCache).lazyCache();
for (int i = 0; i < 4; i++) {
LOG.info("iter: " + i);
env.evalAndUpdate(out, baseSrcCache);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
baseSrcCache.direct().forEach(l -> LOG.info(l.toString()));
}
Aggregations