use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtils method innerJoin.
/**
* This util can be used to perform disk based inner join operations.
*/
public static Iterator<JoinedTuple> innerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator) {
return new Iterator<JoinedTuple>() {
private JoinedTuple nextJoinTuple;
private Tuple currentLeft;
private Tuple currentRight;
// backup variables will hold a Tuple temporary if had to call .next()
// once during the join operation before creating a iterator restore point.
private Tuple backedUpLeft;
private Tuple backedUpRight;
// flags to mark the required side of iteration
private boolean shouldDoLeftIterations = false;
private boolean shouldDoRightIterations = false;
// keeps the no of iterations done on each side of the relationship while keeping the
// other side constant
private int leftIterations = 0;
private int rightIterations = 0;
private JoinedTuple doLeftIteration() {
if (!shouldDoLeftIterations) {
return null;
}
JoinedTuple jtFromLeftIt = null;
if (leftIt.hasNext()) {
Tuple l = leftIt.next();
if (this.leftIterations == 0) {
this.backedUpLeft = l;
}
if (comparator.compare(l, this.currentRight) == 0) {
if (this.leftIterations == 0) {
leftIt.createRestorePoint();
}
this.leftIterations++;
jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
}
}
/*
if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromLeftIt == null) {
this.leftIterations = 0;
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = true;
if (leftIt.hasRestorePoint()) {
leftIt.restore();
leftIt.clearRestorePoint();
}
}
return jtFromLeftIt;
}
private JoinedTuple doRightIteration() {
if (!shouldDoRightIterations) {
return null;
}
JoinedTuple jtFromRightIt = null;
if (rightIt.hasNext()) {
Tuple l = rightIt.next();
if (this.rightIterations == 0) {
this.backedUpRight = l;
}
if (comparator.compare(this.currentLeft, l) == 0) {
if (this.rightIterations == 0) {
rightIt.createRestorePoint();
}
this.rightIterations++;
jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
}
}
/*
if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromRightIt == null) {
this.rightIterations = 0;
this.shouldDoRightIterations = false;
if (rightIt.hasRestorePoint()) {
rightIt.restore();
rightIt.clearRestorePoint();
}
}
return jtFromRightIt;
}
private void makeNextJoinTuple() {
nextJoinTuple = this.doLeftIteration();
if (nextJoinTuple == null) {
nextJoinTuple = this.doRightIteration();
}
while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
// we used the backup, so setting to null
this.backedUpLeft = null;
this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
this.backedUpRight = null;
// still we don't need left or right iterations at this point
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = false;
if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
// schedule to run the left iteration next.
// Left iteration at the end will schedule right iteration
this.shouldDoLeftIterations = true;
break;
} else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
if (leftIt.hasNext()) {
this.backedUpLeft = leftIt.next();
}
this.backedUpRight = this.currentRight;
} else {
if (rightIt.hasNext()) {
this.backedUpRight = rightIt.next();
}
this.backedUpLeft = this.currentLeft;
}
}
}
{
// start by creating the first join tuple
this.makeNextJoinTuple();
}
@Override
public boolean hasNext() {
return nextJoinTuple != null;
}
@Override
public JoinedTuple next() {
JoinedTuple current = nextJoinTuple;
this.makeNextJoinTuple();
return current;
}
};
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method innerJoinWithDiskBasedListComparision.
/**
* This test compares the results of in memory and disk based inner joins.
* Purpose is to verify the accuracy of disk based inner join
*/
@Test
public void innerJoinWithDiskBasedListComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.joinWithCache((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare), CommunicationContext.JoinType.INNER, Config.newBuilder().build());
List<Object> objects = SortJoinUtils.innerJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method leftOuterJoinComparision.
/**
* This test compares the results of in memory and disk based left outer joins.
* Purpose is to verify the accuracy of disk based left outer join
*/
@Test
public void leftOuterJoinComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.leftOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
List<Object> objects = SortJoinUtils.leftOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class HadoopTSet method execute.
@Override
public void execute(Config config, JobAPI.Job job, IWorkerController workerController, IPersistentVolume persistentVolume, IVolatileVolume volatileVolume) {
int workerId = workerController.getWorkerInfo().getWorkerID();
WorkerEnvironment workerEnv = WorkerEnvironment.init(config, job, workerController, persistentVolume, volatileVolume);
BatchEnvironment tSetEnv = TSetEnvironment.initBatch(workerEnv);
Configuration configuration = new Configuration();
configuration.addResource(new Path(HdfsDataContext.getHdfsConfigDirectory(config)));
configuration.set(TextInputFormat.INPUT_DIR, "/input4");
SourceTSet<String> source = tSetEnv.createHadoopSource(configuration, TextInputFormat.class, 4, new MapFunc<Tuple<LongWritable, Text>, String>() {
@Override
public String map(Tuple<LongWritable, Text> input) {
return input.getKey().toString() + " : " + input.getValue().toString();
}
});
SinkTSet<Iterator<String>> sink = source.direct().sink((SinkFunc<Iterator<String>>) value -> {
while (value.hasNext()) {
String next = value.next();
LOG.info("Received value: " + next);
}
return true;
});
tSetEnv.run(sink);
}
use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.
the class BranchingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int para = 2;
SourceTSet<Integer> src = dummySource(env, COUNT, para).setName("src0");
KeyedTSet<Integer, Integer> left = src.mapToTuple(i -> new Tuple<>(i % 2, i)).setName("left");
KeyedTSet<Integer, Integer> right = src.mapToTuple(i -> new Tuple<>(i % 2, i + 1)).setName("right");
JoinTLink<Integer, Integer, Integer> join = left.join(right, CommunicationContext.JoinType.INNER, Integer::compareTo).setName("join");
ComputeTSet<String> map = join.map(t -> "(" + t.getKey() + " " + t.getLeftValue() + " " + t.getRightValue() + ")").setName("map***");
ComputeTSet<String> map1 = map.direct().map(s -> "###" + s).setName("map@@");
ComputeTSet<String> union = map.union(map1).setName("union");
union.direct().forEach(s -> LOG.info(s));
}
Aggregations