use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.
the class DPartitionBatchFinalReceiver method initMergers.
/**
* Initialize the mergers, this happens after each refresh
*/
private void initMergers(long maxBytesInMemory, long maxRecordsInMemory, long maxFileSize, int parallelIOAllowance) {
for (Integer target : expIds.keySet()) {
String shuffleDirectory = this.shuffleDirectories.get(partition.getLogicalPlan().getIndexOfTaskInNode(target) % this.shuffleDirectories.size());
Shuffle sortedMerger;
if (partition.getKeyType() == null) {
sortedMerger = new FSMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getDataType());
} else {
if (comparator != null) {
sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxFileSize, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType(), comparator, target, groupByKey, parallelIOAllowance);
} else {
sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType());
}
}
sortedMergers.put(target, sortedMerger);
finishedSources.put(target, new HashSet<>());
}
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method leftJoinDiskTest.
@Test
public void leftJoinDiskTest() {
int noOfTuples = 1000;
Random random = new Random(System.currentTimeMillis());
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
if (random.nextBoolean()) {
keys2.add(i);
}
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < keys1.size(); i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger1.run();
}
for (int i = 0; i < keys2.size(); i++) {
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.leftJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
Set<Integer> rightKeysLookup = new HashSet<>(keys2);
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(1, joinedTuple.getLeftValue());
if (rightKeysLookup.contains(joinedTuple.getKey())) {
Assert.assertEquals(2, joinedTuple.getRightValue());
} else {
Assert.assertNull(joinedTuple.getRightValue());
}
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(noOfTuples, keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.
the class DKGatherBatchFinalReceiver method init.
@Override
public void init(Config cfg, DataFlowOperation op, Map<Integer, List<Integer>> expectedIds) {
super.init(cfg, op, expectedIds);
long maxBytesInMemory = CommunicationContext.getShuffleMaxBytesInMemory(cfg);
long maxRecordsInMemory = CommunicationContext.getShuffleMaxRecordsInMemory(cfg);
long maxBytesToFile = CommunicationContext.getShuffleFileSize(cfg);
int parallelIOAllowance = CommunicationContext.getParallelIOAllowance(cfg);
for (Integer target : expectedIds.keySet()) {
Shuffle sortedMerger;
if (sorted) {
sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxBytesToFile, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType(), comparator, target, this.groupByKey, parallelIOAllowance);
} else {
sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType());
}
sortedMergers.put(target, sortedMerger);
}
this.bulkReceiver.init(cfg, expectedIds.keySet());
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method innerJoinDiskTest.
@Test
public void innerJoinDiskTest() {
int noOfTuples = 1000;
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
keys2.add(i);
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < noOfTuples; i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger1.run();
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.innerJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(1, joinedTuple.getLeftValue());
Assert.assertEquals(2, joinedTuple.getRightValue());
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(noOfTuples, keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedMerger in project twister2 by DSC-SPIDAL.
the class HashJoinUtilsTest method rightJoinDiskTest.
@Test
public void rightJoinDiskTest() {
int noOfTuples = 1000;
Random random = new Random(System.currentTimeMillis());
List<Integer> keys1 = new ArrayList<>();
List<Integer> keys2 = new ArrayList<>();
for (int i = 0; i < noOfTuples; i++) {
keys1.add(i);
if (random.nextBoolean()) {
keys2.add(i);
}
}
Collections.shuffle(keys1);
Collections.shuffle(keys2);
FSKeyedMerger fsMerger1 = new FSKeyedMerger(0, 0, "/tmp", "op-left", MessageTypes.INTEGER, MessageTypes.INTEGER);
FSKeyedMerger fsMerger2 = new FSKeyedMerger(0, 0, "/tmp", "op-right", MessageTypes.INTEGER, MessageTypes.INTEGER);
byte[] key1 = ByteBuffer.wrap(new byte[4]).putInt(1).array();
byte[] key2 = ByteBuffer.wrap(new byte[4]).putInt(2).array();
for (int i = 0; i < keys1.size(); i++) {
fsMerger1.add(keys1.get(i), key1, Integer.BYTES);
fsMerger1.run();
}
for (int i = 0; i < keys2.size(); i++) {
fsMerger2.add(keys2.get(i), key2, Integer.BYTES);
fsMerger2.run();
}
fsMerger1.switchToReading();
fsMerger2.switchToReading();
ResettableIterator it1 = fsMerger1.readIterator();
ResettableIterator it2 = fsMerger2.readIterator();
Iterator<JoinedTuple> iterator = HashJoinUtils.rightJoin(it1, it2, MessageTypes.INTEGER);
Set<Integer> keysReceived = new HashSet<>();
Set<Integer> leftKeyLookup = new HashSet<>(keys1);
while (iterator.hasNext()) {
JoinedTuple joinedTuple = iterator.next();
Assert.assertEquals(2, joinedTuple.getRightValue());
if (leftKeyLookup.contains(joinedTuple.getKey())) {
Assert.assertEquals(1, joinedTuple.getLeftValue());
} else {
Assert.assertNull(joinedTuple.getLeftValue());
}
keysReceived.add((Integer) joinedTuple.getKey());
}
Assert.assertEquals(keys2.size(), keysReceived.size());
fsMerger1.clean();
fsMerger2.clean();
}
Aggregations