use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.
the class RunMergingFrameReaderTest method testRunFileReader.
@Test
public void testRunFileReader() throws HyracksDataException {
int pageSize = 128;
int numRuns = 4;
int numFramesPerRun = 4;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 2;
IHyracksTaskContext ctx = testUtils.create(pageSize);
ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
runGenerator.open();
Map<Integer, String> keyValuePair = new HashMap<>();
List<IFrame> frameList = new ArrayList<>();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
numFramesPerRun = 2;
minRecordSize = pageSize;
maxRecordSize = pageSize;
frameList.clear();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
runGenerator.close();
List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
for (GeneratedRunFileReader max : runGenerator.getRuns()) {
inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
}
// Let each run file reader not delete the run file when it is read and closed.
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
PA.setValue(run, "deleteAfterClose", false);
}
matchResult(ctx, runGenerator.getRuns(), keyValuePair);
List<IFrameReader> runs = new ArrayList<>();
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
runs.add(run);
}
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
IFrame outFrame = new VSizeFrame(ctx);
reader.open();
while (reader.nextFrame(outFrame)) {
assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
}
reader.close();
assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.
the class AbstractRunGeneratorTest method assertReadSorted.
static void assertReadSorted(List<GeneratedRunFileReader> runs, IFrameTupleAccessor fta, IFrame frame, Map<Integer, String> keyValuePair) throws HyracksDataException {
assertTrue(runs.size() > 0);
for (GeneratedRunFileReader run : runs) {
try {
run.open();
int preKey = Integer.MIN_VALUE;
while (run.nextFrame(frame)) {
fta.reset(frame.getBuffer());
preKey = assertFTADataIsSorted(fta, keyValuePair, preKey);
}
} finally {
run.close();
}
}
assertTrue(keyValuePair.isEmpty());
}
use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.
the class AbstractRunGeneratorTest method testMixedLargeRecords.
@Test
public void testMixedLargeRecords() throws HyracksDataException {
int pageSize = 128;
int frameLimit = 4;
int numRuns = 4;
int minRecordSize = 20;
int maxRecordSize = pageSize / 2;
HashMap<Integer, String> specialPair = generateBigObject(pageSize, frameLimit - 1);
List<GeneratedRunFileReader> size = testSortRecords(pageSize, frameLimit, numRuns, minRecordSize, maxRecordSize, specialPair);
int max = 0;
for (GeneratedRunFileReader run : size) {
max = Math.max(max, run.getMaxFrameSize());
}
assertTrue(max == pageSize * (frameLimit - 1));
}
use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.
the class AbstractRunGeneratorTest method matchResult.
static void matchResult(IHyracksTaskContext ctx, List<GeneratedRunFileReader> runs, Map<Integer, String> keyValuePair) throws HyracksDataException {
HashMap<Integer, String> copyMap2 = new HashMap<>(keyValuePair);
int maxFrameSizes = 0;
for (GeneratedRunFileReader run : runs) {
maxFrameSizes = Math.max(maxFrameSizes, run.getMaxFrameSize());
}
GroupVSizeFrame gframe = new GroupVSizeFrame(ctx, maxFrameSizes);
GroupFrameAccessor gfta = new GroupFrameAccessor(ctx.getInitialFrameSize(), RecordDesc);
assertReadSorted(runs, gfta, gframe, copyMap2);
}
use of org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader in project asterixdb by apache.
the class AbstractExternalSortRunMerger method process.
public void process() throws HyracksDataException {
IFrameWriter finalWriter = null;
try {
if (runs.isEmpty()) {
finalWriter = prepareSkipMergingFinalResultWriter(writer);
finalWriter.open();
if (sorter != null) {
try {
if (sorter.hasRemaining()) {
sorter.flush(finalWriter);
}
} finally {
sorter.close();
}
}
} else {
/** recycle sort buffer */
if (sorter != null) {
sorter.close();
}
finalWriter = prepareFinalMergeResultWriter(writer);
finalWriter.open();
int maxMergeWidth = framesLimit - 1;
inFrames = new ArrayList<>(maxMergeWidth);
outputFrame = new VSizeFrame(ctx);
List<GeneratedRunFileReader> partialRuns = new ArrayList<>(maxMergeWidth);
int stop = runs.size();
currentGenerationRunAvailable.set(0, stop);
while (true) {
int unUsed = selectPartialRuns(maxMergeWidth * ctx.getInitialFrameSize(), runs, partialRuns, currentGenerationRunAvailable, stop);
prepareFrames(unUsed, inFrames, partialRuns);
if (!currentGenerationRunAvailable.isEmpty() || stop < runs.size()) {
GeneratedRunFileReader reader;
if (partialRuns.size() == 1) {
if (!currentGenerationRunAvailable.isEmpty()) {
throw new HyracksDataException("The record is too big to put into the merging frame, please" + " allocate more sorting memory");
} else {
reader = partialRuns.get(0);
}
} else {
RunFileWriter mergeFileWriter = prepareIntermediateMergeRunFile();
IFrameWriter mergeResultWriter = prepareIntermediateMergeResultWriter(mergeFileWriter);
try {
mergeResultWriter.open();
merge(mergeResultWriter, partialRuns);
} catch (Throwable t) {
mergeResultWriter.fail();
throw t;
} finally {
mergeResultWriter.close();
}
reader = mergeFileWriter.createReader();
}
runs.add(reader);
if (currentGenerationRunAvailable.isEmpty()) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("generated runs:" + stop);
}
runs.subList(0, stop).clear();
currentGenerationRunAvailable.clear();
currentGenerationRunAvailable.set(0, runs.size());
stop = runs.size();
}
} else {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("final runs:" + stop);
}
merge(finalWriter, partialRuns);
break;
}
}
}
} catch (Exception e) {
if (finalWriter != null) {
finalWriter.fail();
}
throw HyracksDataException.create(e);
} finally {
try {
if (finalWriter != null) {
finalWriter.close();
}
} finally {
for (RunFileReader reader : runs) {
try {
// close is idempotent.
reader.close();
} catch (Exception e) {
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, e.getMessage(), e);
}
}
}
}
}
}
Aggregations