use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtils method outerJoin.
/**
* This util can be used to perform disk based inner join operations.
*/
public static Iterator<JoinedTuple> outerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator, CommunicationContext.JoinType outerJoinType) {
return new Iterator<JoinedTuple>() {
private JoinedTuple nextJoinTuple;
private Tuple currentLeft;
private Tuple currentRight;
// backup variables will hold a Tuple temporary if had to call .next()
// once during the join operation before creating a iterator restore point.
private Tuple backedUpLeft;
private Tuple backedUpRight;
// flags to mark the required side of iteration
private boolean shouldDoLeftIterations = false;
private boolean shouldDoRightIterations = false;
private JoinedTuple doLeftIteration() {
if (!shouldDoLeftIterations) {
return null;
}
JoinedTuple jtFromLeftIt = null;
if (leftIt.hasNext()) {
Tuple l = leftIt.next();
if (comparator.compare(l, this.currentRight) == 0) {
jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
} else {
this.backedUpLeft = l;
}
}
/*
if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromLeftIt == null) {
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = true;
}
return jtFromLeftIt;
}
private JoinedTuple doRightIteration() {
if (!shouldDoRightIterations) {
return null;
}
JoinedTuple jtFromRightIt = null;
if (rightIt.hasNext()) {
Tuple l = rightIt.next();
if (comparator.compare(this.currentLeft, l) == 0) {
jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
} else {
this.backedUpRight = l;
}
}
/*
if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromRightIt == null) {
this.shouldDoRightIterations = false;
}
return jtFromRightIt;
}
private void makeNextJoinTuple() {
nextJoinTuple = this.doLeftIteration();
if (nextJoinTuple == null) {
nextJoinTuple = this.doRightIteration();
}
while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
// we used the backup, so setting to null
this.backedUpLeft = null;
this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
this.backedUpRight = null;
// still we don't need left or right iterations at this point
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = false;
if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
// schedule to run the left iteration next.
// Left iteration at the end will schedule right iteration
this.shouldDoLeftIterations = true;
break;
} else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
if (outerJoinType.includeLeft()) {
this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), null);
}
if (leftIt.hasNext()) {
this.backedUpLeft = leftIt.next();
}
this.backedUpRight = this.currentRight;
} else {
if (outerJoinType.includeRight()) {
this.nextJoinTuple = new JoinedTuple<>(this.currentRight.getKey(), null, this.currentRight.getValue());
}
if (rightIt.hasNext()) {
this.backedUpRight = rightIt.next();
}
this.backedUpLeft = this.currentLeft;
}
}
}
{
// start by creating the first join tuple
this.makeNextJoinTuple();
}
@Override
public boolean hasNext() {
return nextJoinTuple != null;
}
@Override
public JoinedTuple next() {
JoinedTuple current = nextJoinTuple;
this.makeNextJoinTuple();
return current;
}
};
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtils method innerJoin.
/**
* This util can be used to perform disk based inner join operations.
*/
public static Iterator<JoinedTuple> innerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator) {
return new Iterator<JoinedTuple>() {
private JoinedTuple nextJoinTuple;
private Tuple currentLeft;
private Tuple currentRight;
// backup variables will hold a Tuple temporary if had to call .next()
// once during the join operation before creating a iterator restore point.
private Tuple backedUpLeft;
private Tuple backedUpRight;
// flags to mark the required side of iteration
private boolean shouldDoLeftIterations = false;
private boolean shouldDoRightIterations = false;
// keeps the no of iterations done on each side of the relationship while keeping the
// other side constant
private int leftIterations = 0;
private int rightIterations = 0;
private JoinedTuple doLeftIteration() {
if (!shouldDoLeftIterations) {
return null;
}
JoinedTuple jtFromLeftIt = null;
if (leftIt.hasNext()) {
Tuple l = leftIt.next();
if (this.leftIterations == 0) {
this.backedUpLeft = l;
}
if (comparator.compare(l, this.currentRight) == 0) {
if (this.leftIterations == 0) {
leftIt.createRestorePoint();
}
this.leftIterations++;
jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
}
}
/*
if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromLeftIt == null) {
this.leftIterations = 0;
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = true;
if (leftIt.hasRestorePoint()) {
leftIt.restore();
leftIt.clearRestorePoint();
}
}
return jtFromLeftIt;
}
private JoinedTuple doRightIteration() {
if (!shouldDoRightIterations) {
return null;
}
JoinedTuple jtFromRightIt = null;
if (rightIt.hasNext()) {
Tuple l = rightIt.next();
if (this.rightIterations == 0) {
this.backedUpRight = l;
}
if (comparator.compare(this.currentLeft, l) == 0) {
if (this.rightIterations == 0) {
rightIt.createRestorePoint();
}
this.rightIterations++;
jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
}
}
/*
if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
to run next and restore left iterator
*/
if (jtFromRightIt == null) {
this.rightIterations = 0;
this.shouldDoRightIterations = false;
if (rightIt.hasRestorePoint()) {
rightIt.restore();
rightIt.clearRestorePoint();
}
}
return jtFromRightIt;
}
private void makeNextJoinTuple() {
nextJoinTuple = this.doLeftIteration();
if (nextJoinTuple == null) {
nextJoinTuple = this.doRightIteration();
}
while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
// we used the backup, so setting to null
this.backedUpLeft = null;
this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
this.backedUpRight = null;
// still we don't need left or right iterations at this point
this.shouldDoLeftIterations = false;
this.shouldDoRightIterations = false;
if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
// schedule to run the left iteration next.
// Left iteration at the end will schedule right iteration
this.shouldDoLeftIterations = true;
break;
} else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
if (leftIt.hasNext()) {
this.backedUpLeft = leftIt.next();
}
this.backedUpRight = this.currentRight;
} else {
if (rightIt.hasNext()) {
this.backedUpRight = rightIt.next();
}
this.backedUpLeft = this.currentLeft;
}
}
}
{
// start by creating the first join tuple
this.makeNextJoinTuple();
}
@Override
public boolean hasNext() {
return nextJoinTuple != null;
}
@Override
public JoinedTuple next() {
JoinedTuple current = nextJoinTuple;
this.makeNextJoinTuple();
return current;
}
};
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method leftJoinDiskTest.
@Test
public void leftJoinDiskTest() {
int noOfTuples = 1000;
Random random = new Random(System.currentTimeMillis());
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
if (random.nextBoolean()) {
keys2.add(i);
}
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < keys1.size(); i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger1.run();
}
for (int i = 0; i < keys2.size(); i++) {
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
Set<Integer> rightKeysLookup = new HashSet<>(keys2);
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(1, joinedTuple.getLeftValue());
if (rightKeysLookup.contains(joinedTuple.getKey())) {
Assert.assertEquals(2, joinedTuple.getRightValue());
} else {
Assert.assertNull(joinedTuple.getRightValue());
}
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(noOfTuples, keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method getRightOuterJoined.
private List<Object> getRightOuterJoined() {
List<Object> innerJoined = new ArrayList<>();
innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
innerJoined.add(new JoinedTuple(35, null, "Marketing"));
return innerJoined;
}
use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method innerJoinWithDiskBasedListComparision.
/**
* This test compares the results of in memory and disk based inner joins.
* Purpose is to verify the accuracy of disk based inner join
*/
@Test
public void innerJoinWithDiskBasedListComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.joinWithCache((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare), CommunicationContext.JoinType.INNER, Config.newBuilder().build());
List<Object> objects = SortJoinUtils.innerJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
Aggregations