use of org.apache.hyracks.api.comm.IFrameTupleAccessor in project asterixdb by apache.
the class NestedPlansAccumulatingAggregatorFactory method createAggregator.
@Override
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDesc, RecordDescriptor outRecordDescriptor, int[] keys, int[] partialKeys) throws HyracksDataException {
final AggregatorOutput outputWriter = new AggregatorOutput(subplans, keyFieldIdx.length, decorFieldIdx.length);
final NestedTupleSourceRuntime[] pipelines = new NestedTupleSourceRuntime[subplans.length];
for (int i = 0; i < subplans.length; i++) {
pipelines[i] = (NestedTupleSourceRuntime) assemblePipeline(subplans[i], outputWriter, ctx);
}
return new IAggregatorDescriptor() {
@Override
public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
ArrayTupleBuilder tb = outputWriter.getTupleBuilder();
tb.reset();
for (int i = 0; i < keyFieldIdx.length; ++i) {
tb.addField(accessor, tIndex, keyFieldIdx[i]);
}
for (int i = 0; i < decorFieldIdx.length; ++i) {
tb.addField(accessor, tIndex, decorFieldIdx[i]);
}
for (int i = 0; i < pipelines.length; ++i) {
pipelines[i].open();
}
// aggregate the first tuple
for (int i = 0; i < pipelines.length; i++) {
pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
}
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
for (int i = 0; i < pipelines.length; i++) {
pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
}
}
@Override
public boolean outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
for (int i = 0; i < pipelines.length; i++) {
outputWriter.setInputIdx(i);
pipelines[i].close();
}
// outputWriter.writeTuple(appender);
tupleBuilder.reset();
ArrayTupleBuilder tb = outputWriter.getTupleBuilder();
byte[] data = tb.getByteArray();
int[] fieldEnds = tb.getFieldEndOffsets();
int start = 0;
int offset;
for (int i = 0; i < fieldEnds.length; i++) {
if (i > 0) {
start = fieldEnds[i - 1];
}
offset = fieldEnds[i] - start;
tupleBuilder.addField(data, start, offset);
}
return true;
}
@Override
public AggregateState createAggregateStates() {
return new AggregateState();
}
@Override
public void reset() {
}
@Override
public boolean outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
throw new IllegalStateException("this method should not be called");
}
@Override
public void close() {
}
};
}
use of org.apache.hyracks.api.comm.IFrameTupleAccessor in project asterixdb by apache.
the class NestedPlansRunningAggregatorFactory method createAggregator.
/* (non-Javadoc)
* @see org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory#createAggregator(org.apache.hyracks.api.context.IHyracksTaskContext, org.apache.hyracks.api.dataflow.value.RecordDescriptor, org.apache.hyracks.api.dataflow.value.RecordDescriptor, int[], int[])
*/
@Override
public IAggregatorDescriptor createAggregator(final IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults, final IFrameWriter writer) throws HyracksDataException {
final RunningAggregatorOutput outputWriter = new RunningAggregatorOutput(ctx, subplans, keyFieldIdx.length, decorFieldIdx.length, writer);
final NestedTupleSourceRuntime[] pipelines = new NestedTupleSourceRuntime[subplans.length];
for (int i = 0; i < subplans.length; i++) {
pipelines[i] = (NestedTupleSourceRuntime) assemblePipeline(subplans[i], outputWriter, ctx);
}
final ArrayTupleBuilder gbyTb = outputWriter.getGroupByTupleBuilder();
return new IAggregatorDescriptor() {
@Override
public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
for (int i = 0; i < pipelines.length; ++i) {
pipelines[i].open();
}
gbyTb.reset();
for (int i = 0; i < keyFieldIdx.length; ++i) {
gbyTb.addField(accessor, tIndex, keyFieldIdx[i]);
}
for (int i = 0; i < decorFieldIdx.length; ++i) {
gbyTb.addField(accessor, tIndex, decorFieldIdx[i]);
}
// aggregate the first tuple
for (int i = 0; i < pipelines.length; i++) {
outputWriter.setInputIdx(i);
pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
}
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
for (int i = 0; i < pipelines.length; i++) {
outputWriter.setInputIdx(i);
pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
}
}
@Override
public boolean outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
for (int i = 0; i < pipelines.length; ++i) {
outputWriter.setInputIdx(i);
pipelines[i].close();
}
return false;
}
@Override
public AggregateState createAggregateStates() {
return new AggregateState();
}
@Override
public void reset() {
}
@Override
public boolean outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
throw new IllegalStateException("this method should not be called");
}
@Override
public void close() {
}
};
}
use of org.apache.hyracks.api.comm.IFrameTupleAccessor in project asterixdb by apache.
the class SerializableAggregatorDescriptorFactory method createAggregator.
@Override
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, int[] keyFields, final int[] keyFieldsInPartialResults) throws HyracksDataException {
final int[] keys = keyFields;
/**
* one IAggregatorDescriptor instance per Gby operator
*/
return new IAggregatorDescriptor() {
private FrameTupleReference ftr = new FrameTupleReference();
private ISerializedAggregateEvaluator[] aggs = new ISerializedAggregateEvaluator[aggFactories.length];
private int offsetFieldIndex = keys.length;
private int[] stateFieldLength = new int[aggFactories.length];
@Override
public AggregateState createAggregateStates() {
return new AggregateState();
}
@Override
public void init(ArrayTupleBuilder tb, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
DataOutput output = tb.getDataOutput();
ftr.reset(accessor, tIndex);
for (int i = 0; i < aggs.length; i++) {
int begin = tb.getSize();
if (aggs[i] == null) {
aggs[i] = aggFactories[i].createAggregateEvaluator(ctx);
}
aggs[i].init(output);
tb.addFieldEndOffset();
stateFieldLength[i] = tb.getSize() - begin;
}
// doing initial aggregate
ftr.reset(accessor, tIndex);
for (int i = 0; i < aggs.length; i++) {
byte[] data = tb.getByteArray();
int prevFieldPos = i + keys.length - 1;
int start = prevFieldPos >= 0 ? tb.getFieldEndOffsets()[prevFieldPos] : 0;
aggs[i].step(ftr, data, start, stateFieldLength[i]);
}
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
ftr.reset(accessor, tIndex);
int stateTupleStart = stateAccessor.getTupleStartOffset(stateTupleIndex);
int fieldSlotLength = stateAccessor.getFieldSlotsLength();
for (int i = 0; i < aggs.length; i++) {
byte[] data = stateAccessor.getBuffer().array();
int start = stateAccessor.getFieldStartOffset(stateTupleIndex, i + keys.length) + stateTupleStart + fieldSlotLength;
aggs[i].step(ftr, data, start, stateFieldLength[i]);
}
}
@Override
public boolean outputPartialResult(ArrayTupleBuilder tb, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
byte[] data = stateAccessor.getBuffer().array();
int startOffset = stateAccessor.getTupleStartOffset(tIndex);
int aggFieldOffset = stateAccessor.getFieldStartOffset(tIndex, offsetFieldIndex);
int refOffset = startOffset + stateAccessor.getFieldSlotsLength() + aggFieldOffset;
int start = refOffset;
for (int i = 0; i < aggs.length; i++) {
aggs[i].finishPartial(data, start, stateFieldLength[i], tb.getDataOutput());
start += stateFieldLength[i];
tb.addFieldEndOffset();
}
return true;
}
@Override
public boolean outputFinalResult(ArrayTupleBuilder tb, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
byte[] data = stateAccessor.getBuffer().array();
int startOffset = stateAccessor.getTupleStartOffset(tIndex);
int aggFieldOffset = stateAccessor.getFieldStartOffset(tIndex, offsetFieldIndex);
int refOffset = startOffset + stateAccessor.getFieldSlotsLength() + aggFieldOffset;
int start = refOffset;
for (int i = 0; i < aggs.length; i++) {
aggs[i].finish(data, start, stateFieldLength[i], tb.getDataOutput());
start += stateFieldLength[i];
tb.addFieldEndOffset();
}
return true;
}
@Override
public void reset() {
}
@Override
public void close() {
reset();
}
};
}
use of org.apache.hyracks.api.comm.IFrameTupleAccessor in project asterixdb by apache.
the class HashSpillableTableFactory method buildSpillableTable.
@Override
public ISpillableTable buildSpillableTable(final IHyracksTaskContext ctx, int suggestTableSize, long inputDataBytesSize, final int[] keyFields, final IBinaryComparator[] comparators, final INormalizedKeyComputer firstKeyNormalizerFactory, IAggregatorDescriptorFactory aggregateFactory, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, final int framesLimit, final int seed) throws HyracksDataException {
final int tableSize = suggestTableSize;
// For the output, we need to have at least one frame.
if (framesLimit < MIN_FRAME_LIMT) {
throw new HyracksDataException("The given frame limit is too small to partition the data.");
}
final int[] intermediateResultKeys = new int[keyFields.length];
for (int i = 0; i < keyFields.length; i++) {
intermediateResultKeys[i] = i;
}
final FrameTuplePairComparator ftpcInputCompareToAggregate = new FrameTuplePairComparator(keyFields, intermediateResultKeys, comparators);
final ITuplePartitionComputer tpc = new FieldHashPartitionComputerFamily(keyFields, hashFunctionFamilies).createPartitioner(seed);
// For calculating hash value for the already aggregated tuples (not incoming tuples)
// This computer is required to calculate the hash value of a aggregated tuple
// while doing the garbage collection work on Hash Table.
final ITuplePartitionComputer tpcIntermediate = new FieldHashPartitionComputerFamily(intermediateResultKeys, hashFunctionFamilies).createPartitioner(seed);
final IAggregatorDescriptor aggregator = aggregateFactory.createAggregator(ctx, inRecordDescriptor, outRecordDescriptor, keyFields, intermediateResultKeys, null);
final AggregateState aggregateState = aggregator.createAggregateStates();
final ArrayTupleBuilder stateTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
//TODO(jf) research on the optimized partition size
long memoryBudget = Math.max(MIN_DATA_TABLE_FRAME_LIMT + MIN_HASH_TABLE_FRAME_LIMT, framesLimit - OUTPUT_FRAME_LIMT - MIN_HASH_TABLE_FRAME_LIMT);
final int numPartitions = getNumOfPartitions(inputDataBytesSize / ctx.getInitialFrameSize(), memoryBudget);
final int entriesPerPartition = (int) Math.ceil(1.0 * tableSize / numPartitions);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("created hashtable, table size:" + tableSize + " file size:" + inputDataBytesSize + " #partitions:" + numPartitions);
}
final ArrayTupleBuilder outputTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
return new ISpillableTable() {
private final TuplePointer pointer = new TuplePointer();
private final BitSet spilledSet = new BitSet(numPartitions);
// This frame pool will be shared by both data table and hash table.
private final IDeallocatableFramePool framePool = new DeallocatableFramePool(ctx, framesLimit * ctx.getInitialFrameSize());
// buffer manager for hash table
private final ISimpleFrameBufferManager bufferManagerForHashTable = new FramePoolBackedFrameBufferManager(framePool);
private final ISerializableTable hashTableForTuplePointer = new SerializableHashTable(tableSize, ctx, bufferManagerForHashTable);
// buffer manager for data table
final IPartitionedTupleBufferManager bufferManager = new VPartitionTupleBufferManager(PreferToSpillFullyOccupiedFramePolicy.createAtMostOneFrameForSpilledPartitionConstrain(spilledSet), numPartitions, framePool);
final ITuplePointerAccessor bufferAccessor = bufferManager.getTuplePointerAccessor(outRecordDescriptor);
private final PreferToSpillFullyOccupiedFramePolicy spillPolicy = new PreferToSpillFullyOccupiedFramePolicy(bufferManager, spilledSet);
private final FrameTupleAppender outputAppender = new FrameTupleAppender(new VSizeFrame(ctx));
@Override
public void close() throws HyracksDataException {
hashTableForTuplePointer.close();
aggregator.close();
}
@Override
public void clear(int partition) throws HyracksDataException {
for (int p = getFirstEntryInHashTable(partition); p < getLastEntryInHashTable(partition); p++) {
hashTableForTuplePointer.delete(p);
}
// Checks whether the garbage collection is required and conducts a garbage collection if so.
if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed);
}
}
bufferManager.clearPartition(partition);
}
private int getPartition(int entryInHashTable) {
return entryInHashTable / entriesPerPartition;
}
private int getFirstEntryInHashTable(int partition) {
return partition * entriesPerPartition;
}
private int getLastEntryInHashTable(int partition) {
return Math.min(tableSize, (partition + 1) * entriesPerPartition);
}
@Override
public boolean insert(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
for (int i = 0; i < hashTableForTuplePointer.getTupleCount(entryInHashTable); i++) {
hashTableForTuplePointer.getTuplePointer(entryInHashTable, i, pointer);
bufferAccessor.reset(pointer);
int c = ftpcInputCompareToAggregate.compare(accessor, tIndex, bufferAccessor);
if (c == 0) {
aggregateExistingTuple(accessor, tIndex, bufferAccessor, pointer.getTupleIndex());
return true;
}
}
return insertNewAggregateEntry(entryInHashTable, accessor, tIndex);
}
/**
* Inserts a new aggregate entry into the data table and hash table.
* This insertion must be an atomic operation. We cannot have a partial success or failure.
* So, if an insertion succeeds on the data table and the same insertion on the hash table fails, then
* we need to revert the effect of data table insertion.
*/
private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
initStateTupleBuilder(accessor, tIndex);
int pid = getPartition(entryInHashTable);
// Insertion to the data table
if (!bufferManager.insertTuple(pid, stateTupleBuilder.getByteArray(), stateTupleBuilder.getFieldEndOffsets(), 0, stateTupleBuilder.getSize(), pointer)) {
return false;
}
// Insertion to the hash table
if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
// To preserve the atomicity of this method, we need to undo the effect
// of the above bufferManager.insertTuple() call since the given insertion has failed.
bufferManager.cancelInsertTuple(pid);
return false;
}
return true;
}
private void initStateTupleBuilder(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
stateTupleBuilder.reset();
for (int k = 0; k < keyFields.length; k++) {
stateTupleBuilder.addField(accessor, tIndex, keyFields[k]);
}
aggregator.init(stateTupleBuilder, accessor, tIndex, aggregateState);
}
private void aggregateExistingTuple(IFrameTupleAccessor accessor, int tIndex, ITuplePointerAccessor bufferAccessor, int tupleIndex) throws HyracksDataException {
aggregator.aggregate(accessor, tIndex, bufferAccessor, tupleIndex, aggregateState);
}
@Override
public int flushFrames(int partition, IFrameWriter writer, AggregateType type) throws HyracksDataException {
int count = 0;
for (int hashEntryPid = getFirstEntryInHashTable(partition); hashEntryPid < getLastEntryInHashTable(partition); hashEntryPid++) {
count += hashTableForTuplePointer.getTupleCount(hashEntryPid);
for (int tid = 0; tid < hashTableForTuplePointer.getTupleCount(hashEntryPid); tid++) {
hashTableForTuplePointer.getTuplePointer(hashEntryPid, tid, pointer);
bufferAccessor.reset(pointer);
outputTupleBuilder.reset();
for (int k = 0; k < intermediateResultKeys.length; k++) {
outputTupleBuilder.addField(bufferAccessor.getBuffer().array(), bufferAccessor.getAbsFieldStartOffset(intermediateResultKeys[k]), bufferAccessor.getFieldLength(intermediateResultKeys[k]));
}
boolean hasOutput = false;
switch(type) {
case PARTIAL:
hasOutput = aggregator.outputPartialResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
break;
case FINAL:
hasOutput = aggregator.outputFinalResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
break;
}
if (hasOutput && !outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
outputAppender.write(writer, true);
if (!outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
throw new HyracksDataException("The output item is too large to be fit into a frame.");
}
}
}
}
outputAppender.write(writer, true);
spilledSet.set(partition);
return count;
}
@Override
public int getNumPartitions() {
return bufferManager.getNumPartitions();
}
@Override
public int findVictimPartition(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
int partition = getPartition(entryInHashTable);
return spillPolicy.selectVictimPartition(partition);
}
};
}
use of org.apache.hyracks.api.comm.IFrameTupleAccessor in project asterixdb by apache.
the class AvgFieldGroupAggregatorFactory method createAggregator.
/*
* (non-Javadoc)
*
* @see org.apache.hyracks.dataflow.std.aggregations.
* IFieldAggregateDescriptorFactory
* #createAggregator(org.apache.hyracks.api.context.IHyracksTaskContext,
* org.apache.hyracks.api.dataflow.value.RecordDescriptor,
* org.apache.hyracks.api.dataflow.value.RecordDescriptor)
*/
@Override
public IFieldAggregateDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor) throws HyracksDataException {
return new IFieldAggregateDescriptor() {
@Override
public void reset() {
}
@Override
public void outputPartialResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
int sum, count;
if (!useObjectState) {
sum = IntegerPointable.getInteger(data, offset);
count = IntegerPointable.getInteger(data, offset + 4);
} else {
Integer[] fields = (Integer[]) state.state;
sum = fields[0];
count = fields[1];
}
try {
fieldOutput.writeInt(sum);
fieldOutput.writeInt(count);
} catch (IOException e) {
throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
}
}
@Override
public void outputFinalResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
int sum, count;
if (!useObjectState) {
sum = IntegerPointable.getInteger(data, offset);
count = IntegerPointable.getInteger(data, offset + 4);
} else {
Integer[] fields = (Integer[]) state.state;
sum = fields[0];
count = fields[1];
}
try {
fieldOutput.writeFloat((float) sum / count);
} catch (IOException e) {
throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
}
}
@Override
public void init(IFrameTupleAccessor accessor, int tIndex, DataOutput fieldOutput, AggregateState state) throws HyracksDataException {
int sum = 0;
int count = 0;
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
sum += IntegerPointable.getInteger(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart);
count += 1;
if (!useObjectState) {
try {
fieldOutput.writeInt(sum);
fieldOutput.writeInt(count);
} catch (IOException e) {
throw new HyracksDataException("I/O exception when initializing the aggregator.");
}
} else {
state.state = new Integer[] { sum, count };
}
}
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, byte[] data, int offset, AggregateState state) throws HyracksDataException {
int sum = 0, count = 0;
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
sum += IntegerPointable.getInteger(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart);
count += 1;
if (!useObjectState) {
ByteBuffer buf = ByteBuffer.wrap(data);
sum += buf.getInt(offset);
count += buf.getInt(offset + 4);
buf.putInt(offset, sum);
buf.putInt(offset + 4, count);
} else {
Integer[] fields = (Integer[]) state.state;
sum += fields[0];
count += fields[1];
state.state = new Integer[] { sum, count };
}
}
@Override
public boolean needsObjectState() {
return useObjectState;
}
@Override
public boolean needsBinaryState() {
return !useObjectState;
}
@Override
public AggregateState createState() {
return new AggregateState(new Integer[] { 0, 0 });
}
};
}
Aggregations