Search in sources :

Example 1 with GeneratedRunFileReader

use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.

the class RunMergingFrameReaderTest method testRunFileReader.

@Test
public void testRunFileReader() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 4;
    int numFramesPerRun = 4;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 2;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
    runGenerator.open();
    Map<Integer, String> keyValuePair = new HashMap<>();
    List<IFrame> frameList = new ArrayList<>();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    numFramesPerRun = 2;
    minRecordSize = pageSize;
    maxRecordSize = pageSize;
    frameList.clear();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    runGenerator.close();
    List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
    for (GeneratedRunFileReader max : runGenerator.getRuns()) {
        inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
    }
    // Let each run file reader not delete the run file when it is read and closed.
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        PA.setValue(run, "deleteAfterClose", false);
    }
    matchResult(ctx, runGenerator.getRuns(), keyValuePair);
    List<IFrameReader> runs = new ArrayList<>();
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        runs.add(run);
    }
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
    IFrame outFrame = new VSizeFrame(ctx);
    reader.open();
    while (reader.nextFrame(outFrame)) {
        assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
    }
    reader.close();
    assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
Also used : HashMap(java.util.HashMap) IFrame(org.apache.hyracks.api.comm.IFrame) ArrayList(java.util.ArrayList) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrameReader(org.apache.hyracks.api.comm.IFrameReader) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ExternalSortRunGenerator(org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) Test(org.junit.Test)

Example 2 with GeneratedRunFileReader

use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.

the class AbstractRunGeneratorTest method assertReadSorted.

static void assertReadSorted(List<GeneratedRunFileReader> runs, IFrameTupleAccessor fta, IFrame frame, Map<Integer, String> keyValuePair) throws HyracksDataException {
    assertTrue(runs.size() > 0);
    for (GeneratedRunFileReader run : runs) {
        try {
            run.open();
            int preKey = Integer.MIN_VALUE;
            while (run.nextFrame(frame)) {
                fta.reset(frame.getBuffer());
                preKey = assertFTADataIsSorted(fta, keyValuePair, preKey);
            }
        } finally {
            run.close();
        }
    }
    assertTrue(keyValuePair.isEmpty());
}
Also used : GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)

Example 3 with GeneratedRunFileReader

use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.

the class AbstractRunGeneratorTest method testMixedLargeRecords.

@Test
public void testMixedLargeRecords() throws HyracksDataException {
    int pageSize = 128;
    int frameLimit = 4;
    int numRuns = 4;
    int minRecordSize = 20;
    int maxRecordSize = pageSize / 2;
    HashMap<Integer, String> specialPair = generateBigObject(pageSize, frameLimit - 1);
    List<GeneratedRunFileReader> size = testSortRecords(pageSize, frameLimit, numRuns, minRecordSize, maxRecordSize, specialPair);
    int max = 0;
    for (GeneratedRunFileReader run : size) {
        max = Math.max(max, run.getMaxFrameSize());
    }
    assertTrue(max == pageSize * (frameLimit - 1));
}
Also used : GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) Test(org.junit.Test)

Example 4 with GeneratedRunFileReader

use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.

the class AbstractRunGeneratorTest method matchResult.

static void matchResult(IHyracksTaskContext ctx, List<GeneratedRunFileReader> runs, Map<Integer, String> keyValuePair) throws HyracksDataException {
    HashMap<Integer, String> copyMap2 = new HashMap<>(keyValuePair);
    int maxFrameSizes = 0;
    for (GeneratedRunFileReader run : runs) {
        maxFrameSizes = Math.max(maxFrameSizes, run.getMaxFrameSize());
    }
    GroupVSizeFrame gframe = new GroupVSizeFrame(ctx, maxFrameSizes);
    GroupFrameAccessor gfta = new GroupFrameAccessor(ctx.getInitialFrameSize(), RecordDesc);
    assertReadSorted(runs, gfta, gframe, copyMap2);
}
Also used : GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) HashMap(java.util.HashMap) GroupFrameAccessor(org.apache.hyracks.dataflow.std.sort.util.GroupFrameAccessor) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)

Example 5 with GeneratedRunFileReader

use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.

the class AbstractExternalSortRunMerger method process.

public void process() throws HyracksDataException {
    IFrameWriter finalWriter = null;
    try {
        if (runs.isEmpty()) {
            finalWriter = prepareSkipMergingFinalResultWriter(writer);
            finalWriter.open();
            if (sorter != null) {
                try {
                    if (sorter.hasRemaining()) {
                        sorter.flush(finalWriter);
                    }
                } finally {
                    sorter.close();
                }
            }
        } else {
            /** recycle sort buffer */
            if (sorter != null) {
                sorter.close();
            }
            finalWriter = prepareFinalMergeResultWriter(writer);
            finalWriter.open();
            int maxMergeWidth = framesLimit - 1;
            inFrames = new ArrayList<>(maxMergeWidth);
            outputFrame = new VSizeFrame(ctx);
            List<GeneratedRunFileReader> partialRuns = new ArrayList<>(maxMergeWidth);
            int stop = runs.size();
            currentGenerationRunAvailable.set(0, stop);
            while (true) {
                int unUsed = selectPartialRuns(maxMergeWidth * ctx.getInitialFrameSize(), runs, partialRuns, currentGenerationRunAvailable, stop);
                prepareFrames(unUsed, inFrames, partialRuns);
                if (!currentGenerationRunAvailable.isEmpty() || stop < runs.size()) {
                    GeneratedRunFileReader reader;
                    if (partialRuns.size() == 1) {
                        if (!currentGenerationRunAvailable.isEmpty()) {
                            throw new HyracksDataException("The record is too big to put into the merging frame, please" + " allocate more sorting memory");
                        } else {
                            reader = partialRuns.get(0);
                        }
                    } else {
                        RunFileWriter mergeFileWriter = prepareIntermediateMergeRunFile();
                        IFrameWriter mergeResultWriter = prepareIntermediateMergeResultWriter(mergeFileWriter);
                        try {
                            mergeResultWriter.open();
                            merge(mergeResultWriter, partialRuns);
                        } catch (Throwable t) {
                            mergeResultWriter.fail();
                            throw t;
                        } finally {
                            mergeResultWriter.close();
                        }
                        reader = mergeFileWriter.createReader();
                    }
                    runs.add(reader);
                    if (currentGenerationRunAvailable.isEmpty()) {
                        if (LOGGER.isLoggable(Level.FINE)) {
                            LOGGER.fine("generated runs:" + stop);
                        }
                        runs.subList(0, stop).clear();
                        currentGenerationRunAvailable.clear();
                        currentGenerationRunAvailable.set(0, runs.size());
                        stop = runs.size();
                    }
                } else {
                    if (LOGGER.isLoggable(Level.FINE)) {
                        LOGGER.fine("final runs:" + stop);
                    }
                    merge(finalWriter, partialRuns);
                    break;
                }
            }
        }
    } catch (Exception e) {
        if (finalWriter != null) {
            finalWriter.fail();
        }
        throw HyracksDataException.create(e);
    } finally {
        try {
            if (finalWriter != null) {
                finalWriter.close();
            }
        } finally {
            for (RunFileReader reader : runs) {
                try {
                    // close is idempotent.
                    reader.close();
                } catch (Exception e) {
                    if (LOGGER.isLoggable(Level.WARNING)) {
                        LOGGER.log(Level.WARNING, e.getMessage(), e);
                    }
                }
            }
        }
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) ArrayList(java.util.ArrayList) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) RunFileReader(org.apache.hyracks.dataflow.common.io.RunFileReader) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) RunFileWriter(org.apache.hyracks.dataflow.common.io.RunFileWriter)

Aggregations

GeneratedRunFileReader (org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader)5 GroupVSizeFrame (org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)2 Test (org.junit.Test)2 IFrame (org.apache.hyracks.api.comm.IFrame)1 IFrameReader (org.apache.hyracks.api.comm.IFrameReader)1 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)1 RunFileReader (org.apache.hyracks.dataflow.common.io.RunFileReader)1 RunFileWriter (org.apache.hyracks.dataflow.common.io.RunFileWriter)1 ExternalSortRunGenerator (org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator)1 RunMergingFrameReader (org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader)1 GroupFrameAccessor (org.apache.hyracks.dataflow.std.sort.util.GroupFrameAccessor)1