Search in sources :

Example 41 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class KeyedSerializerTest method testBuildLargeDoubleMessage.

@Test
public void testBuildLargeDoubleMessage() {
    int numBuffers = 10;
    int size = 1000;
    MessageType type = MessageTypes.DOUBLE_ARRAY;
    Object data = createKeyedData(800, type, MessageTypes.DOUBLE);
    InMessage inMessage = keyedSingleValueCase(numBuffers, size, type, MessageTypes.DOUBLE, data);
    Tuple deserializedData = (Tuple) inMessage.getDeserializedData();
    Assert.assertEquals((double) deserializedData.getKey(), (double) ((Tuple) data).getKey(), 0.1);
    Assert.assertArrayEquals((double[]) deserializedData.getValue(), (double[]) ((Tuple) data).getValue(), 0.01);
}
Also used : InMessage(edu.iu.dsc.tws.comms.dfw.InMessage) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Test(org.junit.Test)

Example 42 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class FSKeyedSortedMergerTest method testStart.

@Test
public void testStart() throws Exception {
    int dataLength = 1024;
    int noOfKeys = 1000;
    int dataForEachKey = 10;
    int[] data = new int[dataLength];
    Arrays.fill(data, 1);
    byte[] byteType = IntegerArrayPacker.getInstance().packToByteArray(data);
    for (int i = 0; i < noOfKeys; i++) {
        for (int j = 0; j < dataForEachKey; j++) {
            fsMerger.add(i, byteType, byteType.length);
        }
        fsMerger.run();
    }
    fsMerger.switchToReading();
    Iterator<Object> it = fsMerger.readIterator();
    int count = 0;
    Set<Integer> set = new HashSet<>();
    int current = 0;
    while (it.hasNext()) {
        Tuple val = (Tuple) it.next();
        int k = (int) val.getKey();
        if (k < current) {
            Assert.fail("Wrong order");
        }
        current = k;
        if (set.contains(k)) {
            Assert.fail("Duplicate value");
        }
        set.add(k);
        // data check
        Iterator dataIt = (Iterator) val.getValue();
        int dataCount = 0;
        while (dataIt.hasNext()) {
            int[] arr = (int[]) dataIt.next();
            if (arr.length != dataLength) {
                Assert.fail("Data sizes mismatch");
            }
            dataCount++;
        }
        if (dataCount != dataForEachKey) {
            Assert.fail("Invalid amount of data arrays for key");
        }
        count++;
    }
    if (count != noOfKeys) {
        Assert.fail("Count =  " + count);
    }
}
Also used : Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 43 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class HashJoinUtils method innerJoin.

public static List<Object> innerJoin(List<Tuple> leftRelation, List<Tuple> rightRelation, MessageType messageType) {
    Map<Object, List<Tuple>> leftHash = new THashMap<>(messageType);
    List<Object> joinedTuples = new ArrayList<>();
    for (Tuple tuple : leftRelation) {
        leftHash.computeIfAbsent(tuple.getKey(), k -> new ArrayList<>()).add(tuple);
    }
    for (Tuple rightTuple : rightRelation) {
        List<Tuple> leftTuples = leftHash.getOrDefault(rightTuple.getKey(), Collections.emptyList());
        for (Tuple leftTuple : leftTuples) {
            joinedTuples.add(JoinedTuple.of(leftTuple.getKey(), leftTuple.getValue(), rightTuple.getValue()));
        }
    }
    return joinedTuples;
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) List(java.util.List) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Map(java.util.Map) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Logger(java.util.logging.Logger) Collections(java.util.Collections) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 44 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class HashJoinUtils method rightOuterJoin.

public static List<Object> rightOuterJoin(List<Tuple> leftRelation, List<Tuple> rightRelation, MessageType messageType) {
    Map<Object, List<Tuple>> leftHash = new THashMap<>(messageType);
    List<Object> joinedTuples = new ArrayList<>();
    for (Tuple tuple : leftRelation) {
        leftHash.computeIfAbsent(tuple.getKey(), k -> new ArrayList<>()).add(tuple);
    }
    for (Tuple rightTuple : rightRelation) {
        List<Tuple> leftTuples = leftHash.getOrDefault(rightTuple.getKey(), Collections.emptyList());
        for (Tuple leftTuple : leftTuples) {
            joinedTuples.add(JoinedTuple.of(leftTuple.getKey(), leftTuple.getValue(), rightTuple.getValue()));
        }
        if (leftTuples.isEmpty()) {
            joinedTuples.add(JoinedTuple.of(rightTuple.getKey(), null, rightTuple.getValue()));
        }
    }
    return joinedTuples;
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) List(java.util.List) Iterator(java.util.Iterator) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) MessageType(edu.iu.dsc.tws.api.comms.messaging.types.MessageType) Map(java.util.Map) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Logger(java.util.logging.Logger) Collections(java.util.Collections) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 45 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtils method outerJoin.

/**
 * This util can be used to perform disk based inner join operations.
 */
public static Iterator<JoinedTuple> outerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator, CommunicationContext.JoinType outerJoinType) {
    return new Iterator<JoinedTuple>() {

        private JoinedTuple nextJoinTuple;

        private Tuple currentLeft;

        private Tuple currentRight;

        // backup variables will hold a Tuple temporary if had to call .next()
        // once during the join operation before creating a iterator restore point.
        private Tuple backedUpLeft;

        private Tuple backedUpRight;

        // flags to mark the required side of iteration
        private boolean shouldDoLeftIterations = false;

        private boolean shouldDoRightIterations = false;

        private JoinedTuple doLeftIteration() {
            if (!shouldDoLeftIterations) {
                return null;
            }
            JoinedTuple jtFromLeftIt = null;
            if (leftIt.hasNext()) {
                Tuple l = leftIt.next();
                if (comparator.compare(l, this.currentRight) == 0) {
                    jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
                } else {
                    this.backedUpLeft = l;
                }
            }
            /*
         if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromLeftIt == null) {
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = true;
            }
            return jtFromLeftIt;
        }

        private JoinedTuple doRightIteration() {
            if (!shouldDoRightIterations) {
                return null;
            }
            JoinedTuple jtFromRightIt = null;
            if (rightIt.hasNext()) {
                Tuple l = rightIt.next();
                if (comparator.compare(this.currentLeft, l) == 0) {
                    jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
                } else {
                    this.backedUpRight = l;
                }
            }
            /*
         if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromRightIt == null) {
                this.shouldDoRightIterations = false;
            }
            return jtFromRightIt;
        }

        private void makeNextJoinTuple() {
            nextJoinTuple = this.doLeftIteration();
            if (nextJoinTuple == null) {
                nextJoinTuple = this.doRightIteration();
            }
            while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
                this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
                // we used the backup, so setting to null
                this.backedUpLeft = null;
                this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
                this.backedUpRight = null;
                // still we don't need left or right iterations at this point
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = false;
                if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
                    this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
                    // schedule to run the left iteration next.
                    // Left iteration at the end will schedule right iteration
                    this.shouldDoLeftIterations = true;
                    break;
                } else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
                    if (outerJoinType.includeLeft()) {
                        this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), null);
                    }
                    if (leftIt.hasNext()) {
                        this.backedUpLeft = leftIt.next();
                    }
                    this.backedUpRight = this.currentRight;
                } else {
                    if (outerJoinType.includeRight()) {
                        this.nextJoinTuple = new JoinedTuple<>(this.currentRight.getKey(), null, this.currentRight.getValue());
                    }
                    if (rightIt.hasNext()) {
                        this.backedUpRight = rightIt.next();
                    }
                    this.backedUpLeft = this.currentLeft;
                }
            }
        }

        {
            // start by creating the first join tuple
            this.makeNextJoinTuple();
        }

        @Override
        public boolean hasNext() {
            return nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            JoinedTuple current = nextJoinTuple;
            this.makeNextJoinTuple();
            return current;
        }
    };
}
Also used : RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Aggregations

Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)98 Iterator (java.util.Iterator)38 List (java.util.List)35 Logger (java.util.logging.Logger)34 ArrayList (java.util.ArrayList)29 Config (edu.iu.dsc.tws.api.config.Config)27 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)24 Test (org.junit.Test)24 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)18 InMessage (edu.iu.dsc.tws.comms.dfw.InMessage)17 HashMap (java.util.HashMap)16 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)15 JobConfig (edu.iu.dsc.tws.api.JobConfig)14 MessageTypes (edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes)14 JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)14 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)14 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)13 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)11 MessageType (edu.iu.dsc.tws.api.comms.messaging.types.MessageType)11 Comparator (java.util.Comparator)11