use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class KeyedCheckpointingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
int count = 5;
KeyedSourceTSet<String, Integer> src = dummySource(env, count, 0);
KeyedPersistedTSet<String, Integer> persist = src.keyedDirect().persist();
persist.keyedDirect().forEach(i -> LOG.info(i.toString()));
KeyedSourceTSet<String, Integer> src1 = dummySource(env, count, 10);
src1.keyedDirect().compute(new BaseComputeFunc<Iterator<Tuple<String, Integer>>, String>() {
private DataPartitionConsumer<Tuple<String, Integer>> in;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
in = (DataPartitionConsumer<Tuple<String, Integer>>) ctx.getInput("in").getConsumer();
}
@Override
public String compute(Iterator<Tuple<String, Integer>> input) {
StringBuilder out = new StringBuilder();
while (input.hasNext() && in.hasNext()) {
Tuple<String, Integer> t = input.next();
Tuple<String, Integer> next = in.next();
out.append("(").append(t).append(",").append(next).append(") ");
}
return out.toString();
}
}).addInput("in", persist).direct().forEach(i -> LOG.info(i));
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class TSetCheckptExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10000;
}
@Override
public Integer next() {
return count++;
}
}, 4);
long t1 = System.currentTimeMillis();
ComputeTSet<Object> twoComputes = sourceX.direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect(i * 5);
});
}).direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect((int) i + 2);
});
});
LOG.info("Time for two computes : " + (System.currentTimeMillis() - t1));
t1 = System.currentTimeMillis();
PersistedTSet<Object> persist = twoComputes.persist();
LOG.info("Time for persist : " + (System.currentTimeMillis() - t1) / 1000);
// When persist() is called, twister2 performs all the computations/communication
// upto this point and persists the result into the disk.
// This makes previous data garbage collectible and frees some memory.
// If persist() is called in a checkpointing enabled job, this will create
// a snapshot at this point and will start straightaway from this point if the
// job is restarted.
// Similar to CachedTSets, PersistedTSets can be added as inputs for other TSets and
// operations
persist.reduce((i1, i2) -> {
return (int) i1 + (int) i2;
}).forEach(i -> {
LOG.info("SUM=" + i);
});
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class KeyedBatchIteratorLinkWrapper method persist.
@Override
public KeyedPersistedTSet<K, V> persist() {
// handling checkpointing
if (getTSetEnv().isCheckpointingEnabled()) {
String persistVariableName = this.getId() + "-persisted";
BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
Boolean persisted = chkEnv.initVariable(persistVariableName, false);
if (persisted) {
// create a source function with the capability to read from disk
DiskPartitionBackedSource<Tuple<K, V>> sourceFn = new DiskPartitionBackedSource<>(this.getId());
// pass the source fn to the checkpointed tset (that would create a source tset from the
// source function, the same way as a persisted tset. This preserves the order of tsets
// that are being created in the checkpointed env)
KeyedCheckpointedTSet<K, V> checkTSet = new KeyedCheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
// adding checkpointed tset to the graph, so that the IDs would not change
addChildToGraph(checkTSet);
// run only the checkpointed tset so that it would populate the inputs in the executor
getTSetEnv().runOne(checkTSet);
return checkTSet;
} else {
KeyedPersistedTSet<K, V> storable = this.doPersist();
chkEnv.updateVariable(persistVariableName, true);
chkEnv.commit();
return storable;
}
}
return doPersist();
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class RowBatchTLinkImpl method persist.
/*
* Similar to cache, but stores data in disk rather than in memory.
*/
public StorableTBase<Row> persist() {
// handling checkpointing
if (getTSetEnv().isCheckpointingEnabled()) {
String persistVariableName = this.getId() + "-persisted";
BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
Boolean persisted = chkEnv.initVariable(persistVariableName, false);
if (persisted) {
// create a source function with the capability to read from disk
DiskPartitionBackedSource<Row> sourceFn = new DiskPartitionBackedSource<>(this.getId());
// pass the source fn to the checkpointed tset (that would create a source tset from the
// source function, the same way as a persisted tset. This preserves the order of tsets
// that are being created in the checkpointed env)
CheckpointedTSet<Row> checkTSet = new CheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
// adding checkpointed tset to the graph, so that the IDs would not change
addChildToGraph(checkTSet);
// run only the checkpointed tset so that it would populate the inputs in the executor
getTSetEnv().runOne(checkTSet);
return checkTSet;
} else {
StorableTBase<Row> storable = this.doPersist();
chkEnv.updateVariable(persistVariableName, true);
chkEnv.commit();
return storable;
}
}
return doPersist();
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class BatchTLinkImpl method persist.
/*
* Similar to cache, but stores data in disk rather than in memory.
*/
@Override
public StorableTBase<T0> persist() {
// handling checkpointing
if (getTSetEnv().isCheckpointingEnabled()) {
String persistVariableName = this.getId() + "-persisted";
BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
Boolean persisted = chkEnv.initVariable(persistVariableName, false);
if (persisted) {
// create a source function with the capability to read from disk
DiskPartitionBackedSource<T0> sourceFn = new DiskPartitionBackedSource<>(this.getId());
// pass the source fn to the checkpointed tset (that would create a source tset from the
// source function, the same way as a persisted tset. This preserves the order of tsets
// that are being created in the checkpointed env)
CheckpointedTSet<T0> checkTSet = new CheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
// adding checkpointed tset to the graph, so that the IDs would not change
addChildToGraph(checkTSet);
// run only the checkpointed tset so that it would populate the inputs in the executor
getTSetEnv().runOne(checkTSet);
return checkTSet;
} else {
StorableTBase<T0> storable = this.doPersist();
chkEnv.updateVariable(persistVariableName, true);
chkEnv.commit();
return storable;
}
}
return doPersist();
}
Aggregations