use of edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2 in project twister2 by DSC-SPIDAL.
the class DPartitionBatchFinalReceiver method initMergers.
/**
* Initialize the mergers, this happens after each refresh
*/
private void initMergers(long maxBytesInMemory, long maxRecordsInMemory, long maxFileSize, int parallelIOAllowance) {
for (Integer target : expIds.keySet()) {
String shuffleDirectory = this.shuffleDirectories.get(partition.getLogicalPlan().getIndexOfTaskInNode(target) % this.shuffleDirectories.size());
Shuffle sortedMerger;
if (partition.getKeyType() == null) {
sortedMerger = new FSMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getDataType());
} else {
if (comparator != null) {
sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxFileSize, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType(), comparator, target, groupByKey, parallelIOAllowance);
} else {
sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, DFWIOUtils.getOperationName(target, partition, refresh), partition.getKeyType(), partition.getDataType());
}
}
sortedMergers.put(target, sortedMerger);
finishedSources.put(target, new HashSet<>());
}
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2 in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method innerJoinWithDiskBasedListComparision.
/**
* This test compares the results of in memory and disk based inner joins.
* Purpose is to verify the accuracy of disk based inner join
*/
@Test
public void innerJoinWithDiskBasedListComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.joinWithCache((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare), CommunicationContext.JoinType.INNER, Config.newBuilder().build());
List<Object> objects = SortJoinUtils.innerJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2 in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method leftOuterJoinComparision.
/**
* This test compares the results of in memory and disk based left outer joins.
* Purpose is to verify the accuracy of disk based left outer join
*/
@Test
public void leftOuterJoinComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.leftOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
List<Object> objects = SortJoinUtils.leftOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2 in project twister2 by DSC-SPIDAL.
the class DKGatherBatchFinalReceiver method init.
@Override
public void init(Config cfg, DataFlowOperation op, Map<Integer, List<Integer>> expectedIds) {
super.init(cfg, op, expectedIds);
long maxBytesInMemory = CommunicationContext.getShuffleMaxBytesInMemory(cfg);
long maxRecordsInMemory = CommunicationContext.getShuffleMaxRecordsInMemory(cfg);
long maxBytesToFile = CommunicationContext.getShuffleFileSize(cfg);
int parallelIOAllowance = CommunicationContext.getParallelIOAllowance(cfg);
for (Integer target : expectedIds.keySet()) {
Shuffle sortedMerger;
if (sorted) {
sortedMerger = new FSKeyedSortedMerger2(maxBytesInMemory, maxBytesToFile, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType(), comparator, target, this.groupByKey, parallelIOAllowance);
} else {
sortedMerger = new FSKeyedMerger(maxBytesInMemory, maxRecordsInMemory, shuffleDirectory, getOperationName(target), dataFlowOperation.getKeyType(), dataFlowOperation.getDataType());
}
sortedMergers.put(target, sortedMerger);
}
this.bulkReceiver.init(cfg, expectedIds.keySet());
}
use of edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2 in project twister2 by DSC-SPIDAL.
the class SortJoinUtilsTest method fullOuterJoinComparision.
/**
* This test compares the results of in memory and disk based full outer joins.
* Purpose is to verify the accuracy of disk based full outer join
*/
@Test
public void fullOuterJoinComparision() {
List<Tuple> left = new ArrayList<>();
List<Tuple> right = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 100; i++) {
left.add(Tuple.of(random.nextInt(10), random.nextInt()));
right.add(Tuple.of(random.nextInt(10), random.nextInt()));
}
FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : left) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk1.add(tuple.getKey(), data, data.length);
fsk1.run();
}
FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
for (Tuple tuple : right) {
byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
fsk2.add(tuple.getKey(), data, data.length);
fsk2.run();
}
CommonThreadPool.init(Config.newBuilder().build());
fsk1.switchToReading();
fsk2.switchToReading();
Iterator iterator = SortJoinUtils.fullOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
List<Object> objects = SortJoinUtils.fullOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
int i = 0;
while (iterator.hasNext()) {
JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
}
Assert.assertEquals(i, objects.size());
}
Aggregations