Search in sources :

Example 1 with FSKeyedMerger

use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.

the class DPartitionBatchFinalReceiver method initMergers.

/**
 * Initialize the mergers, this happens after each refresh
 */
private void initMergers(long maxBytesInMemory, long maxRecordsInMemory, long maxFileSize, int parallelIOAllowance) {
    for (Integer target : expIds.keySet()) {
        String shuffleDirectory = this.shuffleDirectories.get(partition.getLogicalPlan().getIndexOfTaskInNode(target) % this.shuffleDirectories.size());
        Shuffle sortedMerger;
        if (partition.getKeyType() == null) {
            sortedMerger = new FSMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getDataType());
        } else {
            if (comparator != null) {
                sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxFileSize, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType(), comparator, target, groupByKey, parallelIOAllowance);
            } else {
                sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType());
            }
        }
        sortedMergers.put(target, sortedMerger);
        finishedSources.put(target, new HashSet<>());
    }
}
Also used : FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) Shuffle(edu.iu.dsc.tws.comms.shuffle.Shuffle) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) FSMerger(edu.iu.dsc.tws.comms.shuffle.FSMerger)

Example 2 with FSKeyedMerger

use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method leftJoinDiskTest.

@Test
public void leftJoinDiskTest() {
    int noOfTuples = 1000;
    Random random = new Random(System.currentTimeMillis());
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        if (random.nextBoolean()) {
            keys2.add(i);
        }
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < keys1.size(); i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger1.run();
    }
    for (int i = 0; i < keys2.size(); i++) {
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    Set<Integer> rightKeysLookup = new HashSet<>(keys2);
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(1, joinedTuple.getLeftValue());
        if (rightKeysLookup.contains(joinedTuple.getKey())) {
            Assert.assertEquals(2, joinedTuple.getRightValue());
        } else {
            Assert.assertNull(joinedTuple.getRightValue());
        }
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(noOfTuples, keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : Random(java.util.Random) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with FSKeyedMerger

use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.

the class DKGatherBatchFinalReceiver method init.

@Override
public void init(Config cfg, DataFlowOperation op, Map<Integer, List<Integer>> expectedIds) {
    super.init(cfg, op, expectedIds);
    long maxBytesInMemory = CommunicationContext.getShuffleMaxBytesInMemory(cfg);
    long maxRecordsInMemory = CommunicationContext.getShuffleMaxRecordsInMemory(cfg);
    long maxBytesToFile = CommunicationContext.getShuffleFileSize(cfg);
    int parallelIOAllowance = CommunicationContext.getParallelIOAllowance(cfg);
    for (Integer target : expectedIds.keySet()) {
        Shuffle sortedMerger;
        if (sorted) {
            sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxBytesToFile, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType(), comparator, target, this.groupByKey, parallelIOAllowance);
        } else {
            sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType());
        }
        sortedMergers.put(target, sortedMerger);
    }
    this.bulkReceiver.init(cfg, expectedIds.keySet());
}
Also used : FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) Shuffle(edu.iu.dsc.tws.comms.shuffle.Shuffle) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger)

Example 4 with FSKeyedMerger

use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method innerJoinDiskTest.

@Test
public void innerJoinDiskTest() {
    int noOfTuples = 1000;
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        keys2.add(i);
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < noOfTuples; i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger1.run();
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.innerJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(1, joinedTuple.getLeftValue());
        Assert.assertEquals(2, joinedTuple.getRightValue());
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(noOfTuples, keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with FSKeyedMerger

use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.

the class HashJoinUtilsTest method rightJoinDiskTest.

@Test
public void rightJoinDiskTest() {
    int noOfTuples = 1000;
    Random random = new Random(System.currentTimeMillis());
    List<Integer> keys1 = new ArrayList<>();
    List<Integer> keys2 = new ArrayList<>();
    for (int i = 0; i < noOfTuples; i++) {
        keys1.add(i);
        if (random.nextBoolean()) {
            keys2.add(i);
        }
    }
    Collections.shuffle(keys1);
    Collections.shuffle(keys2);
    FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
    FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
    byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
    byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
    for (int i = 0; i < keys1.size(); i++) {
        fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
        fsMerger1.run();
    }
    for (int i = 0; i < keys2.size(); i++) {
        fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
        fsMerger2.run();
    }
    fsMerger1.switchToReading();
    fsMerger2.switchToReading();
    ResettableIterator it1 = fsMerger1.readIterator();
    ResettableIterator it2 = fsMerger2.readIterator();
    Iterator<JoinedTuple> iterator = HashJoinUtils.rightJoin(it1, it2, MessageTypes.INTEGER);
    Set<Integer> keysReceived = new HashSet<>();
    Set<Integer> leftKeyLookup = new HashSet<>(keys1);
    while (iterator.hasNext()) {
        JoinedTuple joinedTuple = iterator.next();
        Assert.assertEquals(2, joinedTuple.getRightValue());
        if (leftKeyLookup.contains(joinedTuple.getKey())) {
            Assert.assertEquals(1, joinedTuple.getLeftValue());
        } else {
            Assert.assertNull(joinedTuple.getLeftValue());
        }
        keysReceived.add((Integer) joinedTuple.getKey());
    }
    Assert.assertEquals(keys2.size(), keysReceived.size());
    fsMerger1.clean();
    fsMerger2.clean();
}
Also used : Random(java.util.Random) FSKeyedMerger(edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) ResettableIterator(edu.iu.dsc.tws.comms.shuffle.ResettableIterator) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

FSKeyedMerger (edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger)5 JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)3 ResettableIterator (edu.iu.dsc.tws.comms.shuffle.ResettableIterator)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Test (org.junit.Test)3 FSKeyedSortedMerger2 (edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2)2 Shuffle (edu.iu.dsc.tws.comms.shuffle.Shuffle)2 Random (java.util.Random)2 FSMerger (edu.iu.dsc.tws.comms.shuffle.FSMerger)1