use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method leftJoinDiskTest.
@Test
public void leftJoinDiskTest() {
int noOfTuples = 1000;
Random random = new Random(System.currentTimeMillis());
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
if (random.nextBoolean()) {
keys2.add(i);
}
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < keys1.size(); i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger1.run();
}
for (int i = 0; i < keys2.size(); i++) {
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
Set<Integer> rightKeysLookup = new HashSet<>(keys2);
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(1, joinedTuple.getLeftValue());
if (rightKeysLookup.contains(joinedTuple.getKey())) {
Assert.assertEquals(2, joinedTuple.getRightValue());
} else {
Assert.assertNull(joinedTuple.getRightValue());
}
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(noOfTuples, keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.
the class HashJoinUtils method join.
/**
* Disk based inner join
*/
public static Iterator<JoinedTuple> join(ResettableIterator<Tuple<?, ?>> leftIt, ResettableIterator<Tuple<?, ?>> rightIt, CommunicationContext.JoinType joinType, MessageType keyType) {
// choosing hashing and probing relations
// if inner join:
// hashing = left
// probing = right
// if left join:
// hashing = right
// probing = left
// if right join:
// hashing = left
// probing = right
final ResettableIterator<Tuple<?, ?>> hashingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? rightIt : leftIt;
final ResettableIterator<Tuple<?, ?>> probingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftIt : rightIt;
// set the memory limits based on the heap allocation
final double lowerMemoryBound = Runtime.getRuntime().totalMemory() * 0.1;
return new Iterator<JoinedTuple>() {
private boolean hashingDone;
private Map<Object, List> keyHash = new THashMap<>(keyType);
// always keep the nextJoinTuple in memory. hasNext() will use this field
private JoinedTuple nextJoinTuple;
/**
* This method will perform following actions in order
* <ol>
* <li>Clear existing HashMap</li>
* <li>Create HashMap from the hashingRelation till it hit the memory limits</li>
* <li>Determine whether the hashingRelation is fully consumed</li>
* </ol>
*/
private void doHashing() {
this.keyHash.clear();
// building the hash, as long as memory permits
while (Runtime.getRuntime().freeMemory() > lowerMemoryBound && hashingRelation.hasNext()) {
Tuple<?, ?> nextLeft = hashingRelation.next();
keyHash.computeIfAbsent(nextLeft.getKey(), k -> new ArrayList()).add(nextLeft.getValue());
}
// determine whether hashRelation is fully consumed
hashingDone = !hashingRelation.hasNext();
if (!hashingDone && this.keyHash.isEmpty()) {
// problem!. We have cleared the old hash, yet there's no free memory available to proceed
throw new Twister2RuntimeException("Couldn't progress due to memory limitations." + "Available free memory : " + Runtime.getRuntime().freeMemory() + ", Expected free memory : " + lowerMemoryBound);
}
}
{
// initially do hashing & probing
doHashing();
doProbing();
}
// when iterating over the right(probing) relation, current element
// (which has been returned by next()) will be kept in memory since it should be combined
// with all the tuples in leftListForCurrentKey. But this has to be done on demand, on next()
// call of joined iterator.
private Tuple<?, ?> currentProbingTuple;
// list of tuples from left relation(hashing relation),
// that matches with the currentRightTuple
private List leftListForCurrentKey;
// keeping the index of leftListForCurrentKey
private int leftListIndex = 0;
/**
* This method should be guaranteed to create a {@link JoinedTuple}. If a tuple can't be
* created, caller should determine that before calling this method.
* Additionally, this method should clear everything if everything related to
* currentRightTuple is processed.
*/
private void progressProbing() {
Object key = this.currentProbingTuple.getKey();
// we have interchanged original iterators based on the join type.
// that should be taken into consideration when creating the JoinedTuple
Object left = joinType.equals(CommunicationContext.JoinType.LEFT) ? this.currentProbingTuple.getValue() : leftListForCurrentKey.get(leftListIndex);
Object right = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftListForCurrentKey.get(leftListIndex) : this.currentProbingTuple.getValue();
this.nextJoinTuple = JoinedTuple.of(key, left, right);
leftListIndex++;
// if end of the list has reached, reset everything!
if (leftListIndex == leftListForCurrentKey.size()) {
currentProbingTuple = null;
leftListForCurrentKey = null;
leftListIndex = 0;
}
}
/**
* This method iterates through the right relation(probing relation).
*/
private void doProbing() {
// if there is a non null nextJoinTuple, no need of proceeding
while (this.nextJoinTuple == null) {
// hashed list and still in the middle of combining that list
if (this.currentProbingTuple == null) {
if (probingRelation.hasNext()) {
this.currentProbingTuple = probingRelation.next();
this.leftListForCurrentKey = this.keyHash.get(currentProbingTuple.getKey());
if (this.leftListForCurrentKey == null) {
// handle left and right joins here
if (joinType.equals(CommunicationContext.JoinType.LEFT)) {
this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), currentProbingTuple.getValue(), null);
} else if (joinType.equals(CommunicationContext.JoinType.RIGHT)) {
this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), null, currentProbingTuple.getValue());
}
// any join : We are done with currentProbingTuple
this.currentProbingTuple = null;
} else {
progressProbing();
}
} else {
// right iterator has reached to an end for current HashMap.
if (!hashingDone) {
// clear current hash and reset the right iterator
doHashing();
probingRelation.reset();
} else {
// end of join operation. Yay!
break;
}
}
} else {
progressProbing();
}
}
}
@Override
public boolean hasNext() {
return this.nextJoinTuple != null;
}
@Override
public JoinedTuple next() {
if (!hasNext()) {
throw new Twister2RuntimeException("Join operation has reached to an end. " + "Use hasNext() to check the status.");
}
JoinedTuple currentJoinTuple = nextJoinTuple;
nextJoinTuple = null;
// create the next JoinTuple before returning
doProbing();
return currentJoinTuple;
}
};
}
use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method innerJoinDiskTest.
@Test
public void innerJoinDiskTest() {
int noOfTuples = 1000;
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
keys2.add(i);
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < noOfTuples; i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger1.run();
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.innerJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(1, joinedTuple.getLeftValue());
Assert.assertEquals(2, joinedTuple.getRightValue());
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(noOfTuples, keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method rightJoinDiskTest.
@Test
public void rightJoinDiskTest() {
int noOfTuples = 1000;
Random random = new Random(System.currentTimeMillis());
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
if (random.nextBoolean()) {
keys2.add(i);
}
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < keys1.size(); i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger1.run();
}
for (int i = 0; i < keys2.size(); i++) {
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.rightJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
Set<Integer> leftKeyLookup = new HashSet<>(keys1);
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(2, joinedTuple.getRightValue());
if (leftKeyLookup.contains(joinedTuple.getKey())) {
Assert.assertEquals(1, joinedTuple.getLeftValue());
} else {
Assert.assertNull(joinedTuple.getLeftValue());
}
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(keys2.size(), keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
Aggregations