Search in sources :

Example 1 with ResettableIterator

use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method leftJoinDiskTest.

@Test
public void leftJoinDiskTest() {
    int noOfTuples = 1000;
    Random random = new Random(System.currentTimeMillis());
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        if (random.nextBoolean()) {
            keys2.add(i);
        }
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < keys1.size(); i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger1.run();
    }
    for (int i = 0; i < keys2.size(); i++) {
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    Set<Integer> rightKeysLookup = new HashSet<>(keys2);
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(1, joinedTuple.getLeftValue());
        if (rightKeysLookup.contains(joinedTuple.getKey())) {
            Assert.assertEquals(2, joinedTuple.getRightValue());
        } else {
            Assert.assertNull(joinedTuple.getRightValue());
        }
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(noOfTuples, keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : Random(java.util.Random) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with ResettableIterator

use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.

the class HashJoinUtils method join.

/**
 * Disk based inner join
 */
public static Iterator<JoinedTuple> join(ResettableIterator<Tuple<?, ?>> leftIt, ResettableIterator<Tuple<?, ?>> rightIt, CommunicationContext.JoinType joinType, MessageType keyType) {
    // choosing hashing and probing relations
    // if inner join:
    // hashing = left
    // probing = right
    // if left join:
    // hashing = right
    // probing = left
    // if right join:
    // hashing = left
    // probing = right
    final ResettableIterator<Tuple<?, ?>> hashingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? rightIt : leftIt;
    final ResettableIterator<Tuple<?, ?>> probingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftIt : rightIt;
    // set the memory limits based on the heap allocation
    final double lowerMemoryBound = Runtime.getRuntime().totalMemory() * 0.1;
    return new Iterator<JoinedTuple>() {

        private boolean hashingDone;

        private Map<Object, List> keyHash = new THashMap<>(keyType);

        // always keep the nextJoinTuple in memory. hasNext() will use this field
        private JoinedTuple nextJoinTuple;

        /**
         * This method will perform following actions in order
         * <ol>
         *   <li>Clear existing HashMap</li>
         *   <li>Create HashMap from the hashingRelation till it hit the memory limits</li>
         *   <li>Determine whether the hashingRelation is fully consumed</li>
         * </ol>
         */
        private void doHashing() {
            this.keyHash.clear();
            // building the hash, as long as memory permits
            while (Runtime.getRuntime().freeMemory() > lowerMemoryBound && hashingRelation.hasNext()) {
                Tuple<?, ?> nextLeft = hashingRelation.next();
                keyHash.computeIfAbsent(nextLeft.getKey(), k -> new ArrayList()).add(nextLeft.getValue());
            }
            // determine whether hashRelation is fully consumed
            hashingDone = !hashingRelation.hasNext();
            if (!hashingDone && this.keyHash.isEmpty()) {
                // problem!. We have cleared the old hash, yet there's no free memory available to proceed
                throw new Twister2RuntimeException("Couldn't progress due to memory limitations." + "Available free memory : " + Runtime.getRuntime().freeMemory() + ", Expected free memory : " + lowerMemoryBound);
            }
        }

        {
            // initially do hashing & probing
            doHashing();
            doProbing();
        }

        // when iterating over the right(probing) relation, current element
        // (which has been returned by next()) will be kept in memory since it should be combined
        // with all the tuples in leftListForCurrentKey. But this has to be done on demand, on next()
        // call of joined iterator.
        private Tuple<?, ?> currentProbingTuple;

        // list of tuples from left relation(hashing relation),
        // that matches with the currentRightTuple
        private List leftListForCurrentKey;

        // keeping the index of leftListForCurrentKey
        private int leftListIndex = 0;

        /**
         * This method should be guaranteed to create a {@link JoinedTuple}. If a tuple can't be
         * created, caller should determine that before calling this method.
         * Additionally, this method should clear everything if everything related to
         * currentRightTuple is processed.
         */
        private void progressProbing() {
            Object key = this.currentProbingTuple.getKey();
            // we have interchanged original iterators based on the join type.
            // that should be taken into consideration when creating the JoinedTuple
            Object left = joinType.equals(CommunicationContext.JoinType.LEFT) ? this.currentProbingTuple.getValue() : leftListForCurrentKey.get(leftListIndex);
            Object right = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftListForCurrentKey.get(leftListIndex) : this.currentProbingTuple.getValue();
            this.nextJoinTuple = JoinedTuple.of(key, left, right);
            leftListIndex++;
            // if end of the list has reached, reset everything!
            if (leftListIndex == leftListForCurrentKey.size()) {
                currentProbingTuple = null;
                leftListForCurrentKey = null;
                leftListIndex = 0;
            }
        }

        /**
         * This method iterates through the right relation(probing relation).
         */
        private void doProbing() {
            // if there is a non null nextJoinTuple, no need of proceeding
            while (this.nextJoinTuple == null) {
                // hashed list and still in the middle of combining that list
                if (this.currentProbingTuple == null) {
                    if (probingRelation.hasNext()) {
                        this.currentProbingTuple = probingRelation.next();
                        this.leftListForCurrentKey = this.keyHash.get(currentProbingTuple.getKey());
                        if (this.leftListForCurrentKey == null) {
                            // handle left and right joins here
                            if (joinType.equals(CommunicationContext.JoinType.LEFT)) {
                                this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), currentProbingTuple.getValue(), null);
                            } else if (joinType.equals(CommunicationContext.JoinType.RIGHT)) {
                                this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), null, currentProbingTuple.getValue());
                            }
                            // any join : We are done with currentProbingTuple
                            this.currentProbingTuple = null;
                        } else {
                            progressProbing();
                        }
                    } else {
                        // right iterator has reached to an end for current HashMap.
                        if (!hashingDone) {
                            // clear current hash and reset the right iterator
                            doHashing();
                            probingRelation.reset();
                        } else {
                            // end of join operation. Yay!
                            break;
                        }
                    }
                } else {
                    progressProbing();
                }
            }
        }

        @Override
        public boolean hasNext() {
            return this.nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            if (!hasNext()) {
                throw new Twister2RuntimeException("Join operation has reached to an end. " + "Use hasNext() to check the status.");
            }
            JoinedTuple currentJoinTuple = nextJoinTuple;
            nextJoinTuple = null;
            // create the next JoinTuple before returning
            doProbing();
            return currentJoinTuple;
        }
    };
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) List(java.util.List) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Map(java.util.Map) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Logger(java.util.logging.Logger) Collections(java.util.Collections) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrayList(java.util.ArrayList) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 3 with ResettableIterator

use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method innerJoinDiskTest.

@Test
public void innerJoinDiskTest() {
    int noOfTuples = 1000;
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        keys2.add(i);
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < noOfTuples; i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger1.run();
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.innerJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(1, joinedTuple.getLeftValue());
        Assert.assertEquals(2, joinedTuple.getRightValue());
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(noOfTuples, keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with ResettableIterator

use of edu.iu.dsc.tws.comms.shuffle.ResettableIterator in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method rightJoinDiskTest.

@Test
public void rightJoinDiskTest() {
    int noOfTuples = 1000;
    Random random = new Random(System.currentTimeMillis());
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        if (random.nextBoolean()) {
            keys2.add(i);
        }
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < keys1.size(); i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger1.run();
    }
    for (int i = 0; i < keys2.size(); i++) {
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.rightJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    Set<Integer> leftKeyLookup = new HashSet<>(keys1);
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(2, joinedTuple.getRightValue());
        if (leftKeyLookup.contains(joinedTuple.getKey())) {
            Assert.assertEquals(1, joinedTuple.getLeftValue());
        } else {
            Assert.assertNull(joinedTuple.getLeftValue());
        }
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(keys2.size(), keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : Random(java.util.Random) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)4 ResettableIterator (edu.iu.dsc.tws.comms.shuffle.ResettableIterator)4 ArrayList (java.util.ArrayList)4 FSKeyedMerger (edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger)3 HashSet (java.util.HashSet)3 Test (org.junit.Test)3 Random (java.util.Random)2 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)1 MessageType (edu.iu.dsc.tws.api.comms.messaging.types.MessageType)1 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)1 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)1 Collections (java.util.Collections)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Logger (java.util.logging.Logger)1