Search in sources :

Example 1 with GroupVSizeFrame

use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.

the class RunMergingFrameReaderTest method testRunFileReader.

@Test
public void testRunFileReader() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 4;
    int numFramesPerRun = 4;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 2;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
    runGenerator.open();
    Map<Integer, String> keyValuePair = new HashMap<>();
    List<IFrame> frameList = new ArrayList<>();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    numFramesPerRun = 2;
    minRecordSize = pageSize;
    maxRecordSize = pageSize;
    frameList.clear();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    runGenerator.close();
    List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
    for (GeneratedRunFileReader max : runGenerator.getRuns()) {
        inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
    }
    // Let each run file reader not delete the run file when it is read and closed.
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        PA.setValue(run, "deleteAfterClose", false);
    }
    matchResult(ctx, runGenerator.getRuns(), keyValuePair);
    List<IFrameReader> runs = new ArrayList<>();
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        runs.add(run);
    }
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
    IFrame outFrame = new VSizeFrame(ctx);
    reader.open();
    while (reader.nextFrame(outFrame)) {
        assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
    }
    reader.close();
    assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
Also used : HashMap(java.util.HashMap) IFrame(org.apache.hyracks.api.comm.IFrame) ArrayList(java.util.ArrayList) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrameReader(org.apache.hyracks.api.comm.IFrameReader) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ExternalSortRunGenerator(org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) Test(org.junit.Test)

Example 2 with GroupVSizeFrame

use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.

the class AbstractRunGeneratorTest method matchResult.

static void matchResult(IHyracksTaskContext ctx, List<GeneratedRunFileReader> runs, Map<Integer, String> keyValuePair) throws HyracksDataException {
    HashMap<Integer, String> copyMap2 = new HashMap<>(keyValuePair);
    int maxFrameSizes = 0;
    for (GeneratedRunFileReader run : runs) {
        maxFrameSizes = Math.max(maxFrameSizes, run.getMaxFrameSize());
    }
    GroupVSizeFrame gframe = new GroupVSizeFrame(ctx, maxFrameSizes);
    GroupFrameAccessor gfta = new GroupFrameAccessor(ctx.getInitialFrameSize(), RecordDesc);
    assertReadSorted(runs, gfta, gframe, copyMap2);
}
Also used : GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) HashMap(java.util.HashMap) GroupFrameAccessor(org.apache.hyracks.dataflow.std.sort.util.GroupFrameAccessor) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)

Example 3 with GroupVSizeFrame

use of org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame in project asterixdb by apache.

the class AbstractExternalSortRunMerger method prepareFrames.

private void prepareFrames(int extraFreeMem, List<GroupVSizeFrame> inFrames, List<GeneratedRunFileReader> partialRuns) throws HyracksDataException {
    if (extraFreeMem > 0 && partialRuns.size() > 1) {
        int extraFrames = extraFreeMem / ctx.getInitialFrameSize();
        int avg = (extraFrames / partialRuns.size()) * ctx.getInitialFrameSize();
        int residue = extraFrames % partialRuns.size();
        for (int i = 0; i < residue; i++) {
            partialRuns.get(i).updateSize(Math.min(FrameConstants.MAX_FRAMESIZE, partialRuns.get(i).getMaxFrameSize() + avg + ctx.getInitialFrameSize()));
        }
        for (int i = residue; i < partialRuns.size() && avg > 0; i++) {
            partialRuns.get(i).updateSize(Math.min(FrameConstants.MAX_FRAMESIZE, partialRuns.get(i).getMaxFrameSize() + avg));
        }
    }
    if (inFrames.size() > partialRuns.size()) {
        inFrames.subList(partialRuns.size(), inFrames.size()).clear();
    }
    int i;
    for (i = 0; i < inFrames.size(); i++) {
        inFrames.get(i).resize(partialRuns.get(i).getMaxFrameSize());
    }
    for (; i < partialRuns.size(); i++) {
        inFrames.add(new GroupVSizeFrame(ctx, partialRuns.get(i).getMaxFrameSize()));
    }
}
Also used : GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame)

Aggregations

GroupVSizeFrame (org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame)3 HashMap (java.util.HashMap)2 GeneratedRunFileReader (org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)2 ArrayList (java.util.ArrayList)1 IFrame (org.apache.hyracks.api.comm.IFrame)1 IFrameReader (org.apache.hyracks.api.comm.IFrameReader)1 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1 ExternalSortRunGenerator (org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator)1 RunMergingFrameReader (org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader)1 GroupFrameAccessor (org.apache.hyracks.dataflow.std.sort.util.GroupFrameAccessor)1 Test (org.junit.Test)1