use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testOnlyOneRunShouldMerge.
@Test
public void testOnlyOneRunShouldMerge() throws HyracksDataException {
int pageSize = 128;
int numRuns = 1;
int numFramesPerRun = 1;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 8;
IHyracksTaskContext ctx = testUtils.create(pageSize);
List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
List<TestFrameReader> readerList = new ArrayList<>(numRuns);
List<IFrame> frameList = new ArrayList<>(numRuns);
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
testMergeSucceed(ctx, reader, keyValueMapList);
}
use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testRunFileReader.
@Test
public void testRunFileReader() throws HyracksDataException {
int pageSize = 128;
int numRuns = 4;
int numFramesPerRun = 4;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 2;
IHyracksTaskContext ctx = testUtils.create(pageSize);
ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
runGenerator.open();
Map<Integer, String> keyValuePair = new HashMap<>();
List<IFrame> frameList = new ArrayList<>();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
numFramesPerRun = 2;
minRecordSize = pageSize;
maxRecordSize = pageSize;
frameList.clear();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
runGenerator.close();
List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
for (GeneratedRunFileReader max : runGenerator.getRuns()) {
inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
}
// Let each run file reader not delete the run file when it is read and closed.
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
PA.setValue(run, "deleteAfterClose", false);
}
matchResult(ctx, runGenerator.getRuns(), keyValuePair);
List<IFrameReader> runs = new ArrayList<>();
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
runs.add(run);
}
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
IFrame outFrame = new VSizeFrame(ctx);
reader.open();
while (reader.nextFrame(outFrame)) {
assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
}
reader.close();
assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testOneLargeRunMerge.
@Test
public void testOneLargeRunMerge() throws HyracksDataException {
int pageSize = 64;
int numRuns = 2;
int numFramesPerRun = 1;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 8;
IHyracksTaskContext ctx = testUtils.create(pageSize);
List<Map<Integer, String>> keyValueMap = new ArrayList<>();
List<TestFrameReader> readerList = new ArrayList<>();
List<IFrame> frameList = new ArrayList<>();
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
minRecordSize = pageSize;
maxRecordSize = pageSize;
numFramesPerRun = 4;
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
minRecordSize = pageSize * 2;
maxRecordSize = pageSize * 2;
numFramesPerRun = 6;
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
testMergeSucceed(ctx, reader, keyValueMap);
}
use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testNormalRunMerge.
@Test
public void testNormalRunMerge() throws HyracksDataException {
int pageSize = 128;
int numRuns = 2;
int numFramesPerRun = 2;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 8;
IHyracksTaskContext ctx = testUtils.create(pageSize);
List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
List<TestFrameReader> readerList = new ArrayList<>(numRuns);
List<IFrame> frameList = new ArrayList<>(numRuns);
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
testMergeSucceed(ctx, reader, keyValueMapList);
}
use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testNormalRunMergeWithTopK.
@Test
public void testNormalRunMergeWithTopK() throws HyracksDataException {
int pageSize = 128;
int numRuns = 2;
int numFramesPerRun = 2;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 8;
for (int topK = 1; topK < pageSize * numRuns * numFramesPerRun / maxRecordSize / 2; topK++) {
IHyracksTaskContext ctx = testUtils.create(pageSize);
List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
List<TestFrameReader> readerList = new ArrayList<>(numRuns);
List<IFrame> frameList = new ArrayList<>(numRuns);
prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc, topK);
int totoalCount = testMergeSucceedInner(ctx, reader, keyValueMapList);
int newCount = 0;
for (Map<Integer, String> x : keyValueMapList) {
newCount += x.size();
}
assertEquals(topK + newCount, totoalCount);
}
}
Aggregations