use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.
the class RunMergingFrameReaderTest method testRunFileReader.
@Test
public void testRunFileReader() throws HyracksDataException {
int pageSize = 128;
int numRuns = 4;
int numFramesPerRun = 4;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 2;
IHyracksTaskContext ctx = testUtils.create(pageSize);
ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
runGenerator.open();
Map<Integer, String> keyValuePair = new HashMap<>();
List<IFrame> frameList = new ArrayList<>();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
numFramesPerRun = 2;
minRecordSize = pageSize;
maxRecordSize = pageSize;
frameList.clear();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
runGenerator.close();
List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
for (GeneratedRunFileReader max : runGenerator.getRuns()) {
inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
}
// Let each run file reader not delete the run file when it is read and closed.
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
PA.setValue(run, "deleteAfterClose", false);
}
matchResult(ctx, runGenerator.getRuns(), keyValuePair);
List<IFrameReader> runs = new ArrayList<>();
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
runs.add(run);
}
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
IFrame outFrame = new VSizeFrame(ctx);
reader.open();
while (reader.nextFrame(outFrame)) {
assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
}
reader.close();
assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.
the class AbstractRunGeneratorTest method matchResult.
static void matchResult(IHyracksTaskContext ctx, List<GeneratedRunFileReader> runs, Map<Integer, String> keyValuePair) throws HyracksDataException {
HashMap<Integer, String> copyMap2 = new HashMap<>(keyValuePair);
int maxFrameSizes = 0;
for (GeneratedRunFileReader run : runs) {
maxFrameSizes = Math.max(maxFrameSizes, run.getMaxFrameSize());
}
GroupVSizeFrame gframe = new GroupVSizeFrame(ctx, maxFrameSizes);
GroupFrameAccessor gfta = new GroupFrameAccessor(ctx.getInitialFrameSize(), RecordDesc);
assertReadSorted(runs, gfta, gframe, copyMap2);
}
use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.
the class AbstractExternalSortRunMerger method prepareFrames.
private void prepareFrames(int extraFreeMem, List<GroupVSizeFrame> inFrames, List<GeneratedRunFileReader> partialRuns) throws HyracksDataException {
if (extraFreeMem > 0 && partialRuns.size() > 1) {
int extraFrames = extraFreeMem / ctx.getInitialFrameSize();
int avg = (extraFrames / partialRuns.size()) * ctx.getInitialFrameSize();
int residue = extraFrames % partialRuns.size();
for (int i = 0; i < residue; i++) {
partialRuns.get(i).updateSize(Math.min(FrameConstants.MAX_FRAMESIZE, partialRuns.get(i).getMaxFrameSize() + avg + ctx.getInitialFrameSize()));
}
for (int i = residue; i < partialRuns.size() && avg > 0; i++) {
partialRuns.get(i).updateSize(Math.min(FrameConstants.MAX_FRAMESIZE, partialRuns.get(i).getMaxFrameSize() + avg));
}
}
if (inFrames.size() > partialRuns.size()) {
inFrames.subList(partialRuns.size(), inFrames.size()).clear();
}
int i;
for (i = 0; i < inFrames.size(); i++) {
inFrames.get(i).resize(partialRuns.get(i).getMaxFrameSize());
}
for (; i < partialRuns.size(); i++) {
inFrames.add(new GroupVSizeFrame(ctx, partialRuns.get(i).getMaxFrameSize()));
}
}
Aggregations