Search in sources :

Example 6 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtilsTest method leftOuterJoinComparision.

/**
 * This test compares the results of in memory and disk based left outer joins.
 * Purpose is to verify the accuracy of disk based left outer join
 */
@Test
public void leftOuterJoinComparision() {
    List<Tuple> left = new ArrayList<>();
    List<Tuple> right = new ArrayList<>();
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        left.add(Tuple.of(random.nextInt(10), random.nextInt()));
        right.add(Tuple.of(random.nextInt(10), random.nextInt()));
    }
    FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : left) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk1.add(tuple.getKey(), data, data.length);
        fsk1.run();
    }
    FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : right) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk2.add(tuple.getKey(), data, data.length);
        fsk2.run();
    }
    CommonThreadPool.init(Config.newBuilder().build());
    fsk1.switchToReading();
    fsk2.switchToReading();
    Iterator iterator = SortJoinUtils.leftOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
    List<Object> objects = SortJoinUtils.leftOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
    objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
    int i = 0;
    while (iterator.hasNext()) {
        JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
        JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
        Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
    }
    Assert.assertEquals(i, objects.size());
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) CommonThreadPool(edu.iu.dsc.tws.api.util.CommonThreadPool) Random(java.util.Random) Test(org.junit.Test) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Config(edu.iu.dsc.tws.api.config.Config) UUID(java.util.UUID) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) ArrayList(java.util.ArrayList) List(java.util.List) Comparator(java.util.Comparator) Assert(org.junit.Assert) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Comparator(java.util.Comparator) Random(java.util.Random) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Test(org.junit.Test)

Example 7 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class JoinTestUtils method getFullOuterJoined.

public static List<Object> getFullOuterJoined() {
    List<Object> innerJoined = new ArrayList<>();
    innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
    innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
    innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
    innerJoined.add(new JoinedTuple(null, "Williams", null));
    innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
    innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
    innerJoined.add(new JoinedTuple(35, null, "Marketing"));
    return innerJoined;
}
Also used : ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 8 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class JoinTestUtils method getRightOuterJoined.

public static List<Object> getRightOuterJoined() {
    List<Object> innerJoined = new ArrayList<>();
    innerJoined.add(new JoinedTuple(34, "Smith", "Clerical"));
    innerJoined.add(new JoinedTuple(33, "Jones", "Engineering"));
    innerJoined.add(new JoinedTuple(34, "Robinson", "Clerical"));
    innerJoined.add(new JoinedTuple(33, "Heisenberg", "Engineering"));
    innerJoined.add(new JoinedTuple(31, "Rafferty", "Sales"));
    innerJoined.add(new JoinedTuple(35, null, "Marketing"));
    return innerJoined;
}
Also used : ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 9 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class HashJoinUtils method join.

/**
 * Disk based inner join
 */
public static Iterator<JoinedTuple> join(ResettableIterator<Tuple<?, ?>> leftIt, ResettableIterator<Tuple<?, ?>> rightIt, CommunicationContext.JoinType joinType, MessageType keyType) {
    // choosing hashing and probing relations
    // if inner join:
    // hashing = left
    // probing = right
    // if left join:
    // hashing = right
    // probing = left
    // if right join:
    // hashing = left
    // probing = right
    final ResettableIterator<Tuple<?, ?>> hashingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? rightIt : leftIt;
    final ResettableIterator<Tuple<?, ?>> probingRelation = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftIt : rightIt;
    // set the memory limits based on the heap allocation
    final double lowerMemoryBound = Runtime.getRuntime().totalMemory() * 0.1;
    return new Iterator<JoinedTuple>() {

        private boolean hashingDone;

        private Map<Object, List> keyHash = new THashMap<>(keyType);

        // always keep the nextJoinTuple in memory. hasNext() will use this field
        private JoinedTuple nextJoinTuple;

        /**
         * This method will perform following actions in order
         * <ol>
         *   <li>Clear existing HashMap</li>
         *   <li>Create HashMap from the hashingRelation till it hit the memory limits</li>
         *   <li>Determine whether the hashingRelation is fully consumed</li>
         * </ol>
         */
        private void doHashing() {
            this.keyHash.clear();
            // building the hash, as long as memory permits
            while (Runtime.getRuntime().freeMemory() > lowerMemoryBound && hashingRelation.hasNext()) {
                Tuple<?, ?> nextLeft = hashingRelation.next();
                keyHash.computeIfAbsent(nextLeft.getKey(), k -> new ArrayList()).add(nextLeft.getValue());
            }
            // determine whether hashRelation is fully consumed
            hashingDone = !hashingRelation.hasNext();
            if (!hashingDone && this.keyHash.isEmpty()) {
                // problem!. We have cleared the old hash, yet there's no free memory available to proceed
                throw new Twister2RuntimeException("Couldn't progress due to memory limitations." + "Available free memory : " + Runtime.getRuntime().freeMemory() + ", Expected free memory : " + lowerMemoryBound);
            }
        }

        {
            // initially do hashing & probing
            doHashing();
            doProbing();
        }

        // when iterating over the right(probing) relation, current element
        // (which has been returned by next()) will be kept in memory since it should be combined
        // with all the tuples in leftListForCurrentKey. But this has to be done on demand, on next()
        // call of joined iterator.
        private Tuple<?, ?> currentProbingTuple;

        // list of tuples from left relation(hashing relation),
        // that matches with the currentRightTuple
        private List leftListForCurrentKey;

        // keeping the index of leftListForCurrentKey
        private int leftListIndex = 0;

        /**
         * This method should be guaranteed to create a {@link JoinedTuple}. If a tuple can't be
         * created, caller should determine that before calling this method.
         * Additionally, this method should clear everything if everything related to
         * currentRightTuple is processed.
         */
        private void progressProbing() {
            Object key = this.currentProbingTuple.getKey();
            // we have interchanged original iterators based on the join type.
            // that should be taken into consideration when creating the JoinedTuple
            Object left = joinType.equals(CommunicationContext.JoinType.LEFT) ? this.currentProbingTuple.getValue() : leftListForCurrentKey.get(leftListIndex);
            Object right = joinType.equals(CommunicationContext.JoinType.LEFT) ? leftListForCurrentKey.get(leftListIndex) : this.currentProbingTuple.getValue();
            this.nextJoinTuple = JoinedTuple.of(key, left, right);
            leftListIndex++;
            // if end of the list has reached, reset everything!
            if (leftListIndex == leftListForCurrentKey.size()) {
                currentProbingTuple = null;
                leftListForCurrentKey = null;
                leftListIndex = 0;
            }
        }

        /**
         * This method iterates through the right relation(probing relation).
         */
        private void doProbing() {
            // if there is a non null nextJoinTuple, no need of proceeding
            while (this.nextJoinTuple == null) {
                // hashed list and still in the middle of combining that list
                if (this.currentProbingTuple == null) {
                    if (probingRelation.hasNext()) {
                        this.currentProbingTuple = probingRelation.next();
                        this.leftListForCurrentKey = this.keyHash.get(currentProbingTuple.getKey());
                        if (this.leftListForCurrentKey == null) {
                            // handle left and right joins here
                            if (joinType.equals(CommunicationContext.JoinType.LEFT)) {
                                this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), currentProbingTuple.getValue(), null);
                            } else if (joinType.equals(CommunicationContext.JoinType.RIGHT)) {
                                this.nextJoinTuple = JoinedTuple.of(currentProbingTuple.getKey(), null, currentProbingTuple.getValue());
                            }
                            // any join : We are done with currentProbingTuple
                            this.currentProbingTuple = null;
                        } else {
                            progressProbing();
                        }
                    } else {
                        // right iterator has reached to an end for current HashMap.
                        if (!hashingDone) {
                            // clear current hash and reset the right iterator
                            doHashing();
                            probingRelation.reset();
                        } else {
                            // end of join operation. Yay!
                            break;
                        }
                    }
                } else {
                    progressProbing();
                }
            }
        }

        @Override
        public boolean hasNext() {
            return this.nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            if (!hasNext()) {
                throw new Twister2RuntimeException("Join operation has reached to an end. " + "Use hasNext() to check the status.");
            }
            JoinedTuple currentJoinTuple = nextJoinTuple;
            nextJoinTuple = null;
            // create the next JoinTuple before returning
            doProbing();
            return currentJoinTuple;
        }
    };
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) List(java.util.List) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Map(java.util.Map) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Logger(java.util.logging.Logger) Collections(java.util.Collections) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrayList(java.util.ArrayList) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 10 with JoinedTuple

use of edu.iu.dsc.tws.api.comms.structs.JoinedTuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtils method outerJoin.

/**
 * Full Outer join the left and right relation using the tuple key
 */
private static List<Object> outerJoin(List<Tuple> leftRelation, List<Tuple> rightRelation, KeyComparatorWrapper comparator, CommunicationContext.JoinType outerJoinType) {
    int leftIndex = 0;
    int rightIndex = 0;
    leftRelation.sort(comparator);
    rightRelation.sort(comparator);
    List<Object> outPut = new ArrayList<>();
    while (leftIndex < leftRelation.size() && rightIndex < rightRelation.size()) {
        Tuple left = leftRelation.get(leftIndex);
        Tuple right = rightRelation.get(rightIndex);
        if (comparator.compare(left, right) == 0) {
            outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), right.getValue()));
            int index = leftIndex + 1;
            while (index < leftRelation.size()) {
                Tuple l = leftRelation.get(index);
                if (comparator.compare(l, right) == 0) {
                    outPut.add(new JoinedTuple<>(l.getKey(), l.getValue(), right.getValue()));
                } else {
                    break;
                }
                index++;
            }
            leftIndex = index;
            index = rightIndex + 1;
            while (index < rightRelation.size()) {
                Tuple r = rightRelation.get(index);
                if (comparator.compare(left, r) == 0) {
                    outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), r.getValue()));
                } else {
                    break;
                }
                index++;
            }
            rightIndex = index;
        } else if (comparator.compare(left, right) < 0) {
            if (outerJoinType.includeLeft()) {
                outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), null));
            }
            leftIndex++;
        } else {
            if (outerJoinType.includeRight()) {
                outPut.add(new JoinedTuple<>(right.getKey(), null, right.getValue()));
            }
            rightIndex++;
        }
    }
    while (leftIndex < leftRelation.size() && outerJoinType.includeLeft()) {
        Tuple left = leftRelation.get(leftIndex);
        outPut.add(new JoinedTuple<>(left.getKey(), left.getValue(), null));
        leftIndex++;
    }
    while (rightIndex < rightRelation.size() && outerJoinType.includeRight()) {
        Tuple right = rightRelation.get(rightIndex);
        outPut.add(new JoinedTuple<>(right.getKey(), null, right.getValue()));
        rightIndex++;
    }
    return outPut;
}
Also used : ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Aggregations

JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)22 ArrayList (java.util.ArrayList)20 Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)11 Iterator (java.util.Iterator)9 RestorableIterator (edu.iu.dsc.tws.comms.shuffle.RestorableIterator)8 Test (org.junit.Test)8 List (java.util.List)7 Random (java.util.Random)7 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)6 Logger (java.util.logging.Logger)6 MessageTypes (edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes)5 Config (edu.iu.dsc.tws.api.config.Config)5 CommonThreadPool (edu.iu.dsc.tws.api.util.CommonThreadPool)5 FSKeyedSortedMerger2 (edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2)5 Comparator (java.util.Comparator)5 UUID (java.util.UUID)5 Assert (org.junit.Assert)5 ResettableIterator (edu.iu.dsc.tws.comms.shuffle.ResettableIterator)4 FSKeyedMerger (edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger)3 HashSet (java.util.HashSet)3