Search in sources :

Example 1 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtils method outerJoin.

/**
 * This util can be used to perform disk based inner join operations.
 */
public static Iterator<JoinedTuple> outerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator, CommunicationContext.JoinType outerJoinType) {
    return new Iterator<JoinedTuple>() {

        private JoinedTuple nextJoinTuple;

        private Tuple currentLeft;

        private Tuple currentRight;

        // backup variables will hold a Tuple temporary if had to call .next()
        // once during the join operation before creating a iterator restore point.
        private Tuple backedUpLeft;

        private Tuple backedUpRight;

        // flags to mark the required side of iteration
        private boolean shouldDoLeftIterations = false;

        private boolean shouldDoRightIterations = false;

        private JoinedTuple doLeftIteration() {
            if (!shouldDoLeftIterations) {
                return null;
            }
            JoinedTuple jtFromLeftIt = null;
            if (leftIt.hasNext()) {
                Tuple l = leftIt.next();
                if (comparator.compare(l, this.currentRight) == 0) {
                    jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
                } else {
                    this.backedUpLeft = l;
                }
            }
            /*
         if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromLeftIt == null) {
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = true;
            }
            return jtFromLeftIt;
        }

        private JoinedTuple doRightIteration() {
            if (!shouldDoRightIterations) {
                return null;
            }
            JoinedTuple jtFromRightIt = null;
            if (rightIt.hasNext()) {
                Tuple l = rightIt.next();
                if (comparator.compare(this.currentLeft, l) == 0) {
                    jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
                } else {
                    this.backedUpRight = l;
                }
            }
            /*
         if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromRightIt == null) {
                this.shouldDoRightIterations = false;
            }
            return jtFromRightIt;
        }

        private void makeNextJoinTuple() {
            nextJoinTuple = this.doLeftIteration();
            if (nextJoinTuple == null) {
                nextJoinTuple = this.doRightIteration();
            }
            while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
                this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
                // we used the backup, so setting to null
                this.backedUpLeft = null;
                this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
                this.backedUpRight = null;
                // still we don't need left or right iterations at this point
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = false;
                if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
                    this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
                    // schedule to run the left iteration next.
                    // Left iteration at the end will schedule right iteration
                    this.shouldDoLeftIterations = true;
                    break;
                } else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
                    if (outerJoinType.includeLeft()) {
                        this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), null);
                    }
                    if (leftIt.hasNext()) {
                        this.backedUpLeft = leftIt.next();
                    }
                    this.backedUpRight = this.currentRight;
                } else {
                    if (outerJoinType.includeRight()) {
                        this.nextJoinTuple = new JoinedTuple<>(this.currentRight.getKey(), null, this.currentRight.getValue());
                    }
                    if (rightIt.hasNext()) {
                        this.backedUpRight = rightIt.next();
                    }
                    this.backedUpLeft = this.currentLeft;
                }
            }
        }

        {
            // start by creating the first join tuple
            this.makeNextJoinTuple();
        }

        @Override
        public boolean hasNext() {
            return nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            JoinedTuple current = nextJoinTuple;
            this.makeNextJoinTuple();
            return current;
        }
    };
}
Also used : RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 2 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtils method innerJoin.

/**
 * This util can be used to perform disk based inner join operations.
 */
public static Iterator<JoinedTuple> innerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator) {
    return new Iterator<JoinedTuple>() {

        private JoinedTuple nextJoinTuple;

        private Tuple currentLeft;

        private Tuple currentRight;

        // backup variables will hold a Tuple temporary if had to call .next()
        // once during the join operation before creating a iterator restore point.
        private Tuple backedUpLeft;

        private Tuple backedUpRight;

        // flags to mark the required side of iteration
        private boolean shouldDoLeftIterations = false;

        private boolean shouldDoRightIterations = false;

        // keeps the no of iterations done on each side of the relationship while keeping the
        // other side constant
        private int leftIterations = 0;

        private int rightIterations = 0;

        private JoinedTuple doLeftIteration() {
            if (!shouldDoLeftIterations) {
                return null;
            }
            JoinedTuple jtFromLeftIt = null;
            if (leftIt.hasNext()) {
                Tuple l = leftIt.next();
                if (this.leftIterations == 0) {
                    this.backedUpLeft = l;
                }
                if (comparator.compare(l, this.currentRight) == 0) {
                    if (this.leftIterations == 0) {
                        leftIt.createRestorePoint();
                    }
                    this.leftIterations++;
                    jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
                }
            }
            /*
         if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromLeftIt == null) {
                this.leftIterations = 0;
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = true;
                if (leftIt.hasRestorePoint()) {
                    leftIt.restore();
                    leftIt.clearRestorePoint();
                }
            }
            return jtFromLeftIt;
        }

        private JoinedTuple doRightIteration() {
            if (!shouldDoRightIterations) {
                return null;
            }
            JoinedTuple jtFromRightIt = null;
            if (rightIt.hasNext()) {
                Tuple l = rightIt.next();
                if (this.rightIterations == 0) {
                    this.backedUpRight = l;
                }
                if (comparator.compare(this.currentLeft, l) == 0) {
                    if (this.rightIterations == 0) {
                        rightIt.createRestorePoint();
                    }
                    this.rightIterations++;
                    jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
                }
            }
            /*
         if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromRightIt == null) {
                this.rightIterations = 0;
                this.shouldDoRightIterations = false;
                if (rightIt.hasRestorePoint()) {
                    rightIt.restore();
                    rightIt.clearRestorePoint();
                }
            }
            return jtFromRightIt;
        }

        private void makeNextJoinTuple() {
            nextJoinTuple = this.doLeftIteration();
            if (nextJoinTuple == null) {
                nextJoinTuple = this.doRightIteration();
            }
            while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
                this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
                // we used the backup, so setting to null
                this.backedUpLeft = null;
                this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
                this.backedUpRight = null;
                // still we don't need left or right iterations at this point
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = false;
                if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
                    this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
                    // schedule to run the left iteration next.
                    // Left iteration at the end will schedule right iteration
                    this.shouldDoLeftIterations = true;
                    break;
                } else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
                    if (leftIt.hasNext()) {
                        this.backedUpLeft = leftIt.next();
                    }
                    this.backedUpRight = this.currentRight;
                } else {
                    if (rightIt.hasNext()) {
                        this.backedUpRight = rightIt.next();
                    }
                    this.backedUpLeft = this.currentLeft;
                }
            }
        }

        {
            // start by creating the first join tuple
            this.makeNextJoinTuple();
        }

        @Override
        public boolean hasNext() {
            return nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            JoinedTuple current = nextJoinTuple;
            this.makeNextJoinTuple();
            return current;
        }
    };
}
Also used : RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 3 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method leftJoinDiskTest.

@Test
public void leftJoinDiskTest() {
    int noOfTuples = 1000;
    Random random = new Random(System.currentTimeMillis());
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        if (random.nextBoolean()) {
            keys2.add(i);
        }
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < keys1.size(); i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger1.run();
    }
    for (int i = 0; i < keys2.size(); i++) {
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    Set<Integer> rightKeysLookup = new HashSet<>(keys2);
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(1, joinedTuple.getLeftValue());
        if (rightKeysLookup.contains(joinedTuple.getKey())) {
            Assert.assertEquals(2, joinedTuple.getRightValue());
        } else {
            Assert.assertNull(joinedTuple.getRightValue());
        }
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(noOfTuples, keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : Random(java.util.Random) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtilsTest method getRightOuterJoined.

private List<Object> getRightOuterJoined() {
    List<Object> innerJoined = new ArrayList<>();
    innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
    innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
    innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
    innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
    innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
    innerJoined.add(new JoinedTuple(35, null, "Marketing"));
    return innerJoined;
}
Also used : ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 5 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtilsTest method innerJoinWithDiskBasedListComparision.

/**
 * This test compares the results of in memory and disk based inner joins.
 * Purpose is to verify the accuracy of disk based inner join
 */
@Test
public void innerJoinWithDiskBasedListComparision() {
    List<Tuple> left = new ArrayList<>();
    List<Tuple> right = new ArrayList<>();
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        left.add(Tuple.of(random.nextInt(10), random.nextInt()));
        right.add(Tuple.of(random.nextInt(10), random.nextInt()));
    }
    FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : left) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk1.add(tuple.getKey(), data, data.length);
        fsk1.run();
    }
    FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : right) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk2.add(tuple.getKey(), data, data.length);
        fsk2.run();
    }
    CommonThreadPool.init(Config.newBuilder().build());
    fsk1.switchToReading();
    fsk2.switchToReading();
    Iterator iterator = SortJoinUtils.joinWithCache((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare), CommunicationContext.JoinType.INNER, Config.newBuilder().build());
    List<Object> objects = SortJoinUtils.innerJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
    objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
    int i = 0;
    while (iterator.hasNext()) {
        JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
        JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
        Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
    }
    Assert.assertEquals(i, objects.size());
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) CommonThreadPool(edu.iu.dsc.tws.api.util.CommonThreadPool) Random(java.util.Random) Test(org.junit.Test) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Config(edu.iu.dsc.tws.api.config.Config) UUID(java.util.UUID) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) ArrayList(java.util.ArrayList) List(java.util.List) Comparator(java.util.Comparator) Assert(org.junit.Assert) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Comparator(java.util.Comparator) Random(java.util.Random) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Test(org.junit.Test)

Aggregations

JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)22 ArrayList (java.util.ArrayList)20 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)11 Iterator (java.util.Iterator)9 RestorableIterator (edu.iu.dsc.tws.comms.shuffle.RestorableIterator)8 Test (org.junit.Test)8 List (java.util.List)7 Random (java.util.Random)7 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)6 Logger (java.util.logging.Logger)6 MessageTypes (edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes)5 Config (edu.iu.dsc.tws.api.config.Config)5 CommonThreadPool (edu.iu.dsc.tws.api.util.CommonThreadPool)5 FSKeyedSortedMerger2 (edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2)5 Comparator (java.util.Comparator)5 UUID (java.util.UUID)5 Assert (org.junit.Assert)5 ResettableIterator (edu.iu.dsc.tws.comms.shuffle.ResettableIterator)4 FSKeyedMerger (edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger)3 HashSet (java.util.HashSet)3