use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method leftOuterJoinComparision.
/**
* This test compares the results of in memory and disk based left outer joins.
* Purpose is to verify the accuracy of disk based left outer join
*/
@Test
public void leftOuterJoinComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.leftOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
List<Object> objects = SortJoinUtils.leftOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class JoinTestUtils method getFullOuterJoined.
public static List<Object> getFullOuterJoined() {
List<Object> innerJoined = new ArrayList<>();
innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
innerJoined.add(new JoinedTuple(null, "Williams", null));
innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
innerJoined.add(new JoinedTuple(35, null, "Marketing"));
return innerJoined;
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class JoinTestUtils method getRightOuterJoined.
public static List<Object> getRightOuterJoined() {
List<Object> innerJoined = new ArrayList<>();
innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
innerJoined.add(new JoinedTuple(35, null, "Marketing"));
return innerJoined;
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class HashJoinUtils method join.
/**
* Disk based inner join
*/
public static Iterator<JoinedTuple> join(ResettableIterator<Tuple<?, ?>> leftIt, ResettableIterator<Tuple<?, ?>> rightIt, CommunicationContext.JoinType joinType, MessageType keyType) {
// choosing hashing and probing relations
// if inner join:
// hashing = left
// probing = right
// if left join:
// hashing = right
// probing = left
// if right join:
// hashing = left
// probing = right
final ResettableIterator<Tuple<?, ?>> hashingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? rightIt : leftIt;
final ResettableIterator<Tuple<?, ?>> probingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftIt : rightIt;
// set the memory limits based on the heap allocation
final double lowerMemoryBound = Runtime.getRuntime().totalMemory() * 0.1;
return new Iterator<JoinedTuple>() {
private boolean hashingDone;
private Map<Object, List> keyHash = new THashMap<>(keyType);
// always keep the nextJoinTuple in memory. hasNext() will use this field
private JoinedTuple nextJoinTuple;
/**
* This method will perform following actions in order
* <ol>
* <li>Clear existing HashMap</li>
* <li>Create HashMap from the hashingRelation till it hit the memory limits</li>
* <li>Determine whether the hashingRelation is fully consumed</li>
* </ol>
*/
private void doHashing() {
this.keyHash.clear();
// building the hash, as long as memory permits
while (Runtime.getRuntime().freeMemory() > lowerMemoryBound && hashingRelation.hasNext()) {
Tuple<?, ?> nextLeft = hashingRelation.next();
keyHash.computeIfAbsent(nextLeft.getKey(), k -> new ArrayList()).add(nextLeft.getValue());
}
// determine whether hashRelation is fully consumed
hashingDone = !hashingRelation.hasNext();
if (!hashingDone && this.keyHash.isEmpty()) {
// problem!. We have cleared the old hash, yet there's no free memory available to proceed
throw new Twister2RuntimeException("Couldn't progress due to memory limitations." + "Available free memory : " + Runtime.getRuntime().freeMemory() + ", Expected free memory : " + lowerMemoryBound);
}
}
{
// initially do hashing & probing
doHashing();
doProbing();
}
// when iterating over the right(probing) relation, current element
// (which has been returned by next()) will be kept in memory since it should be combined
// with all the tuples in leftListForCurrentKey. But this has to be done on demand, on next()
// call of joined iterator.
private Tuple<?, ?> currentProbingTuple;
// list of tuples from left relation(hashing relation),
// that matches with the currentRightTuple
private List leftListForCurrentKey;
// keeping the index of leftListForCurrentKey
private int leftListIndex = 0;
/**
* This method should be guaranteed to create a {@link JoinedTuple}. If a tuple can't be
* created, caller should determine that before calling this method.
* Additionally, this method should clear everything if everything related to
* currentRightTuple is processed.
*/
private void progressProbing() {
Object key = this.currentProbingTuple.getKey();
// we have interchanged original iterators based on the join type.
// that should be taken into consideration when creating the JoinedTuple
Object left = joinType.equals(CommunicationContext.JoinType.LEFT) ? this.currentProbingTuple.getValue() : leftListForCurrentKey.get(leftListIndex);
Object right = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftListForCurrentKey.get(leftListIndex) : this.currentProbingTuple.getValue();
this.nextJoinTuple = JoinedTuple.of(key, left, right);
leftListIndex++;
// if end of the list has reached, reset everything!
if (leftListIndex == leftListForCurrentKey.size()) {
currentProbingTuple = null;
leftListForCurrentKey = null;
leftListIndex = 0;
}
}
/**
* This method iterates through the right relation(probing relation).
*/
private void doProbing() {
// if there is a non null nextJoinTuple, no need of proceeding
while (this.nextJoinTuple == null) {
// hashed list and still in the middle of combining that list
if (this.currentProbingTuple == null) {
if (probingRelation.hasNext()) {
this.currentProbingTuple = probingRelation.next();
this.leftListForCurrentKey = this.keyHash.get(currentProbingTuple.getKey());
if (this.leftListForCurrentKey == null) {
// handle left and right joins here
if (joinType.equals(CommunicationContext.JoinType.LEFT)) {
this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), currentProbingTuple.getValue(), null);
} else if (joinType.equals(CommunicationContext.JoinType.RIGHT)) {
this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), null, currentProbingTuple.getValue());
}
// any join : We are done with currentProbingTuple
this.currentProbingTuple = null;
} else {
progressProbing();
}
} else {
// right iterator has reached to an end for current HashMap.
if (!hashingDone) {
// clear current hash and reset the right iterator
doHashing();
probingRelation.reset();
} else {
// end of join operation. Yay!
break;
}
}
} else {
progressProbing();
}
}
}
@Override
public boolean hasNext() {
return this.nextJoinTuple != null;
}
@Override
public JoinedTuple next() {
if (!hasNext()) {
throw new Twister2RuntimeException("Join operation has reached to an end. " + "Use hasNext() to check the status.");
}
JoinedTuple currentJoinTuple = nextJoinTuple;
nextJoinTuple = null;
// create the next JoinTuple before returning
doProbing();
return currentJoinTuple;
}
};
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtils method outerJoin.
/**
* Full Outer join the left and right relation using the tuple key
*/
private static List<Object> outerJoin(List<Tuple> leftRelation, List<Tuple> rightRelation, KeyComparatorWrapper comparator, CommunicationContext.JoinType outerJoinType) {
int leftIndex = 0;
int rightIndex = 0;
leftRelation.sort(comparator);
rightRelation.sort(comparator);
List<Object> outPut = new ArrayList<>();
while (leftIndex < leftRelation.size() && rightIndex < rightRelation.size()) {
Tuple left = leftRelation.get(leftIndex);
Tuple right = rightRelation.get(rightIndex);
if (comparator.compare(left, right) == 0) {
outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), right.getValue()));
int index = leftIndex + 1;
while (index < leftRelation.size()) {
Tuple l = leftRelation.get(index);
if (comparator.compare(l, right) == 0) {
outPut.add(new JoinedTuple<>(l.getKey(), l.getValue(), right.getValue()));
} else {
break;
}
index++;
}
leftIndex = index;
index = rightIndex + 1;
while (index < rightRelation.size()) {
Tuple r = rightRelation.get(index);
if (comparator.compare(left, r) == 0) {
outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), r.getValue()));
} else {
break;
}
index++;
}
rightIndex = index;
} else if (comparator.compare(left, right) < 0) {
if (outerJoinType.includeLeft()) {
outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), null));
}
leftIndex++;
} else {
if (outerJoinType.includeRight()) {
outPut.add(new JoinedTuple<>(right.getKey(), null, right.getValue()));
}
rightIndex++;
}
}
while (leftIndex < leftRelation.size() && outerJoinType.includeLeft()) {
Tuple left = leftRelation.get(leftIndex);
outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), null));
leftIndex++;
}
while (rightIndex < rightRelation.size() && outerJoinType.includeRight()) {
Tuple right = rightRelation.get(rightIndex);
outPut.add(new JoinedTuple<>(right.getKey(), null, right.getValue()));
rightIndex++;
}
return outPut;
}
Aggregations