use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.
the class RunMergingFrameReaderTest method testRunFileReader.
@Test
public void testRunFileReader() throws HyracksDataException {
int pageSize = 128;
int numRuns = 4;
int numFramesPerRun = 4;
int minRecordSize = pageSize / 10;
int maxRecordSize = pageSize / 2;
IHyracksTaskContext ctx = testUtils.create(pageSize);
ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
runGenerator.open();
Map<Integer, String> keyValuePair = new HashMap<>();
List<IFrame> frameList = new ArrayList<>();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
numFramesPerRun = 2;
minRecordSize = pageSize;
maxRecordSize = pageSize;
frameList.clear();
prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
for (IFrame frame : frameList) {
runGenerator.nextFrame(frame.getBuffer());
}
runGenerator.close();
List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
for (GeneratedRunFileReader max : runGenerator.getRuns()) {
inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
}
// Let each run file reader not delete the run file when it is read and closed.
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
PA.setValue(run, "deleteAfterClose", false);
}
matchResult(ctx, runGenerator.getRuns(), keyValuePair);
List<IFrameReader> runs = new ArrayList<>();
for (GeneratedRunFileReader run : runGenerator.getRuns()) {
runs.add(run);
}
RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
IFrame outFrame = new VSizeFrame(ctx);
reader.open();
while (reader.nextFrame(outFrame)) {
assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
}
reader.close();
assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.
the class HashSpillableTableFactory method buildSpillableTable.
@Override
public ISpillableTable buildSpillableTable(final IHyracksTaskContext ctx, int suggestTableSize, long inputDataBytesSize, final int[] keyFields, final IBinaryComparator[] comparators, final INormalizedKeyComputer firstKeyNormalizerFactory, IAggregatorDescriptorFactory aggregateFactory, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, final int framesLimit, final int seed) throws HyracksDataException {
final int tableSize = suggestTableSize;
// For the output, we need to have at least one frame.
if (framesLimit < MIN_FRAME_LIMT) {
throw new HyracksDataException("The given frame limit is too small to partition the data.");
}
final int[] intermediateResultKeys = new int[keyFields.length];
for (int i = 0; i < keyFields.length; i++) {
intermediateResultKeys[i] = i;
}
final FrameTuplePairComparator ftpcInputCompareToAggregate = new FrameTuplePairComparator(keyFields, intermediateResultKeys, comparators);
final ITuplePartitionComputer tpc = new FieldHashPartitionComputerFamily(keyFields, hashFunctionFamilies).createPartitioner(seed);
// For calculating hash value for the already aggregated tuples (not incoming tuples)
// This computer is required to calculate the hash value of a aggregated tuple
// while doing the garbage collection work on Hash Table.
final ITuplePartitionComputer tpcIntermediate = new FieldHashPartitionComputerFamily(intermediateResultKeys, hashFunctionFamilies).createPartitioner(seed);
final IAggregatorDescriptor aggregator = aggregateFactory.createAggregator(ctx, inRecordDescriptor, outRecordDescriptor, keyFields, intermediateResultKeys, null);
final AggregateState aggregateState = aggregator.createAggregateStates();
final ArrayTupleBuilder stateTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
//TODO(jf) research on the optimized partition size
long memoryBudget = Math.max(MIN_DATA_TABLE_FRAME_LIMT + MIN_HASH_TABLE_FRAME_LIMT, framesLimit - OUTPUT_FRAME_LIMT - MIN_HASH_TABLE_FRAME_LIMT);
final int numPartitions = getNumOfPartitions(inputDataBytesSize / ctx.getInitialFrameSize(), memoryBudget);
final int entriesPerPartition = (int) Math.ceil(1.0 * tableSize / numPartitions);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("created hashtable, table size:" + tableSize + " file size:" + inputDataBytesSize + " #partitions:" + numPartitions);
}
final ArrayTupleBuilder outputTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
return new ISpillableTable() {
private final TuplePointer pointer = new TuplePointer();
private final BitSet spilledSet = new BitSet(numPartitions);
// This frame pool will be shared by both data table and hash table.
private final IDeallocatableFramePool framePool = new DeallocatableFramePool(ctx, framesLimit * ctx.getInitialFrameSize());
// buffer manager for hash table
private final ISimpleFrameBufferManager bufferManagerForHashTable = new FramePoolBackedFrameBufferManager(framePool);
private final ISerializableTable hashTableForTuplePointer = new SerializableHashTable(tableSize, ctx, bufferManagerForHashTable);
// buffer manager for data table
final IPartitionedTupleBufferManager bufferManager = new VPartitionTupleBufferManager(PreferToSpillFullyOccupiedFramePolicy.createAtMostOneFrameForSpilledPartitionConstrain(spilledSet), numPartitions, framePool);
final ITuplePointerAccessor bufferAccessor = bufferManager.getTuplePointerAccessor(outRecordDescriptor);
private final PreferToSpillFullyOccupiedFramePolicy spillPolicy = new PreferToSpillFullyOccupiedFramePolicy(bufferManager, spilledSet);
private final FrameTupleAppender outputAppender = new FrameTupleAppender(new VSizeFrame(ctx));
@Override
public void close() throws HyracksDataException {
hashTableForTuplePointer.close();
aggregator.close();
}
@Override
public void clear(int partition) throws HyracksDataException {
for (int p = getFirstEntryInHashTable(partition); p < getLastEntryInHashTable(partition); p++) {
hashTableForTuplePointer.delete(p);
}
// Checks whether the garbage collection is required and conducts a garbage collection if so.
if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed);
}
}
bufferManager.clearPartition(partition);
}
private int getPartition(int entryInHashTable) {
return entryInHashTable / entriesPerPartition;
}
private int getFirstEntryInHashTable(int partition) {
return partition * entriesPerPartition;
}
private int getLastEntryInHashTable(int partition) {
return Math.min(tableSize, (partition + 1) * entriesPerPartition);
}
@Override
public boolean insert(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
for (int i = 0; i < hashTableForTuplePointer.getTupleCount(entryInHashTable); i++) {
hashTableForTuplePointer.getTuplePointer(entryInHashTable, i, pointer);
bufferAccessor.reset(pointer);
int c = ftpcInputCompareToAggregate.compare(accessor, tIndex, bufferAccessor);
if (c == 0) {
aggregateExistingTuple(accessor, tIndex, bufferAccessor, pointer.getTupleIndex());
return true;
}
}
return insertNewAggregateEntry(entryInHashTable, accessor, tIndex);
}
/**
* Inserts a new aggregate entry into the data table and hash table.
* This insertion must be an atomic operation. We cannot have a partial success or failure.
* So, if an insertion succeeds on the data table and the same insertion on the hash table fails, then
* we need to revert the effect of data table insertion.
*/
private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
initStateTupleBuilder(accessor, tIndex);
int pid = getPartition(entryInHashTable);
// Insertion to the data table
if (!bufferManager.insertTuple(pid, stateTupleBuilder.getByteArray(), stateTupleBuilder.getFieldEndOffsets(), 0, stateTupleBuilder.getSize(), pointer)) {
return false;
}
// Insertion to the hash table
if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
// To preserve the atomicity of this method, we need to undo the effect
// of the above bufferManager.insertTuple() call since the given insertion has failed.
bufferManager.cancelInsertTuple(pid);
return false;
}
return true;
}
private void initStateTupleBuilder(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
stateTupleBuilder.reset();
for (int k = 0; k < keyFields.length; k++) {
stateTupleBuilder.addField(accessor, tIndex, keyFields[k]);
}
aggregator.init(stateTupleBuilder, accessor, tIndex, aggregateState);
}
private void aggregateExistingTuple(IFrameTupleAccessor accessor, int tIndex, ITuplePointerAccessor bufferAccessor, int tupleIndex) throws HyracksDataException {
aggregator.aggregate(accessor, tIndex, bufferAccessor, tupleIndex, aggregateState);
}
@Override
public int flushFrames(int partition, IFrameWriter writer, AggregateType type) throws HyracksDataException {
int count = 0;
for (int hashEntryPid = getFirstEntryInHashTable(partition); hashEntryPid < getLastEntryInHashTable(partition); hashEntryPid++) {
count += hashTableForTuplePointer.getTupleCount(hashEntryPid);
for (int tid = 0; tid < hashTableForTuplePointer.getTupleCount(hashEntryPid); tid++) {
hashTableForTuplePointer.getTuplePointer(hashEntryPid, tid, pointer);
bufferAccessor.reset(pointer);
outputTupleBuilder.reset();
for (int k = 0; k < intermediateResultKeys.length; k++) {
outputTupleBuilder.addField(bufferAccessor.getBuffer().array(), bufferAccessor.getAbsFieldStartOffset(intermediateResultKeys[k]), bufferAccessor.getFieldLength(intermediateResultKeys[k]));
}
boolean hasOutput = false;
switch(type) {
case PARTIAL:
hasOutput = aggregator.outputPartialResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
break;
case FINAL:
hasOutput = aggregator.outputFinalResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
break;
}
if (hasOutput && !outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
outputAppender.write(writer, true);
if (!outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
throw new HyracksDataException("The output item is too large to be fit into a frame.");
}
}
}
}
outputAppender.write(writer, true);
spilledSet.set(partition);
return count;
}
@Override
public int getNumPartitions() {
return bufferManager.getNumPartitions();
}
@Override
public int findVictimPartition(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
int partition = getPartition(entryInHashTable);
return spillPolicy.selectVictimPartition(partition);
}
};
}
use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.
the class ResultWriterOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final IDatasetPartitionManager dpm = ctx.getDatasetPartitionManager();
final IFrame frame = new VSizeFrame(ctx);
final FrameOutputStream frameOutputStream = new FrameOutputStream(ctx.getInitialFrameSize());
frameOutputStream.reset(frame, true);
PrintStream printStream = new PrintStream(frameOutputStream);
final RecordDescriptor outRecordDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
final IResultSerializer resultSerializer = resultSerializerFactory.createResultSerializer(outRecordDesc, printStream);
final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(outRecordDesc);
return new AbstractUnaryInputSinkOperatorNodePushable() {
private IFrameWriter datasetPartitionWriter;
private boolean failed = false;
@Override
public void open() throws HyracksDataException {
try {
datasetPartitionWriter = dpm.createDatasetPartitionWriter(ctx, rsId, ordered, asyncMode, partition, nPartitions);
datasetPartitionWriter.open();
resultSerializer.init();
} catch (HyracksException e) {
throw HyracksDataException.create(e);
}
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
frameTupleAccessor.reset(buffer);
for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
resultSerializer.appendTuple(frameTupleAccessor, tIndex);
if (!frameOutputStream.appendTuple()) {
frameOutputStream.flush(datasetPartitionWriter);
resultSerializer.appendTuple(frameTupleAccessor, tIndex);
frameOutputStream.appendTuple();
}
}
}
@Override
public void fail() throws HyracksDataException {
failed = true;
datasetPartitionWriter.fail();
}
@Override
public void close() throws HyracksDataException {
try {
if (!failed && frameOutputStream.getTupleCount() > 0) {
frameOutputStream.flush(datasetPartitionWriter);
}
} catch (Exception e) {
datasetPartitionWriter.fail();
throw e;
} finally {
datasetPartitionWriter.close();
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{ ");
sb.append("\"rsId\": \"").append(rsId).append("\", ");
sb.append("\"ordered\": ").append(ordered).append(", ");
sb.append("\"asyncMode\": ").append(asyncMode).append(" }");
return sb.toString();
}
};
}
use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.
the class OptimizedHybridHashJoin method loadSpilledPartitionToMem.
private boolean loadSpilledPartitionToMem(int pid, RunFileWriter wr) throws HyracksDataException {
RunFileReader r = wr.createReader();
try {
r.open();
if (reloadBuffer == null) {
reloadBuffer = new VSizeFrame(ctx);
}
while (r.nextFrame(reloadBuffer)) {
accessorBuild.reset(reloadBuffer.getBuffer());
for (int tid = 0; tid < accessorBuild.getTupleCount(); tid++) {
if (bufferManager.insertTuple(pid, accessorBuild, tid, tempPtr)) {
continue;
}
// for some reason (e.g. due to fragmentation) if the inserting failed,
// we need to clear the occupied frames
bufferManager.clearPartition(pid);
return false;
}
}
// Closes and deletes the run file if it is already loaded into memory.
r.setDeleteAfterClose(true);
} finally {
r.close();
}
spilledStatus.set(pid, false);
buildRFWriters[pid] = null;
return true;
}
use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.
the class ConstantTupleSourceOperatorNodePushable method initialize.
@Override
public void initialize() throws HyracksDataException {
FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
if (fieldSlots != null && tupleData != null && tupleSize > 0)
appender.append(fieldSlots, tupleData, 0, tupleSize);
writer.open();
try {
appender.write(writer, false);
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
}
}
Aggregations