Search in sources :

Example 1 with RunMergingFrameReader

use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testOnlyOneRunShouldMerge.

@Test
public void testOnlyOneRunShouldMerge() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 1;
    int numFramesPerRun = 1;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 8;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
    List<TestFrameReader> readerList = new ArrayList<>(numRuns);
    List<IFrame> frameList = new ArrayList<>(numRuns);
    prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
    testMergeSucceed(ctx, reader, keyValueMapList);
}
Also used : RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrame(org.apache.hyracks.api.comm.IFrame) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 2 with RunMergingFrameReader

use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testRunFileReader.

@Test
public void testRunFileReader() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 4;
    int numFramesPerRun = 4;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 2;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
    runGenerator.open();
    Map<Integer, String> keyValuePair = new HashMap<>();
    List<IFrame> frameList = new ArrayList<>();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    numFramesPerRun = 2;
    minRecordSize = pageSize;
    maxRecordSize = pageSize;
    frameList.clear();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    runGenerator.close();
    List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
    for (GeneratedRunFileReader max : runGenerator.getRuns()) {
        inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
    }
    // Let each run file reader not delete the run file when it is read and closed.
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        PA.setValue(run, "deleteAfterClose", false);
    }
    matchResult(ctx, runGenerator.getRuns(), keyValuePair);
    List<IFrameReader> runs = new ArrayList<>();
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        runs.add(run);
    }
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
    IFrame outFrame = new VSizeFrame(ctx);
    reader.open();
    while (reader.nextFrame(outFrame)) {
        assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
    }
    reader.close();
    assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
Also used : HashMap(java.util.HashMap) IFrame(org.apache.hyracks.api.comm.IFrame) ArrayList(java.util.ArrayList) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrameReader(org.apache.hyracks.api.comm.IFrameReader) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ExternalSortRunGenerator(org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) Test(org.junit.Test)

Example 3 with RunMergingFrameReader

use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testOneLargeRunMerge.

@Test
public void testOneLargeRunMerge() throws HyracksDataException {
    int pageSize = 64;
    int numRuns = 2;
    int numFramesPerRun = 1;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 8;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    List<Map<Integer, String>> keyValueMap = new ArrayList<>();
    List<TestFrameReader> readerList = new ArrayList<>();
    List<IFrame> frameList = new ArrayList<>();
    prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
    minRecordSize = pageSize;
    maxRecordSize = pageSize;
    numFramesPerRun = 4;
    prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
    minRecordSize = pageSize * 2;
    maxRecordSize = pageSize * 2;
    numFramesPerRun = 6;
    prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMap);
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
    testMergeSucceed(ctx, reader, keyValueMap);
}
Also used : RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrame(org.apache.hyracks.api.comm.IFrame) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 4 with RunMergingFrameReader

use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testNormalRunMerge.

@Test
public void testNormalRunMerge() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 2;
    int numFramesPerRun = 2;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 8;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
    List<TestFrameReader> readerList = new ArrayList<>(numRuns);
    List<IFrame> frameList = new ArrayList<>(numRuns);
    prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc);
    testMergeSucceed(ctx, reader, keyValueMapList);
}
Also used : RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrame(org.apache.hyracks.api.comm.IFrame) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 5 with RunMergingFrameReader

use of org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testNormalRunMergeWithTopK.

@Test
public void testNormalRunMergeWithTopK() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 2;
    int numFramesPerRun = 2;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 8;
    for (int topK = 1; topK < pageSize * numRuns * numFramesPerRun / maxRecordSize / 2; topK++) {
        IHyracksTaskContext ctx = testUtils.create(pageSize);
        List<Map<Integer, String>> keyValueMapList = new ArrayList<>(numRuns);
        List<TestFrameReader> readerList = new ArrayList<>(numRuns);
        List<IFrame> frameList = new ArrayList<>(numRuns);
        prepareRandomInputRunList(ctx, pageSize, numRuns, numFramesPerRun, minRecordSize, maxRecordSize, readerList, frameList, keyValueMapList);
        RunMergingFrameReader reader = new RunMergingFrameReader(ctx, readerList, frameList, SortFields, Comparators, null, RecordDesc, topK);
        int totoalCount = testMergeSucceedInner(ctx, reader, keyValueMapList);
        int newCount = 0;
        for (Map<Integer, String> x : keyValueMapList) {
            newCount += x.size();
        }
        assertEquals(topK + newCount, totoalCount);
    }
}
Also used : IFrame(org.apache.hyracks.api.comm.IFrame) ArrayList(java.util.ArrayList) RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)6 IFrame (org.apache.hyracks.api.comm.IFrame)6 RunMergingFrameReader (org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader)6 HashMap (java.util.HashMap)5 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)5 Test (org.junit.Test)5 Map (java.util.Map)4 TreeMap (java.util.TreeMap)4 IFrameReader (org.apache.hyracks.api.comm.IFrameReader)2 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)2 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)1 GeneratedRunFileReader (org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)1 ExternalSortRunGenerator (org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator)1 GroupVSizeFrame (org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame)1