use of org.apache.hyracks.dataflow.common.io.RunFileWriter in project asterixdb by apache.
the class ExternalGroupBuildOperatorNodePushable method close.
@Override
public void close() throws HyracksDataException {
if (isFailed && state.getRuns() != null) {
for (RunFileWriter run : state.getRuns()) {
if (run != null) {
run.erase();
}
}
} else {
externalGroupBy.flushSpilledPartitions();
ctx.setStateObject(state);
if (LOGGER.isLoggable(Level.FINE)) {
int numOfPartition = state.getSpillableTable().getNumPartitions();
int numOfSpilledPart = 0;
for (int i = 0; i < numOfPartition; i++) {
if (state.getSpilledNumTuples()[i] > 0) {
numOfSpilledPart++;
}
}
LOGGER.fine("level 0:" + "build with " + numOfPartition + " partitions" + ", spilled " + numOfSpilledPart + " partitions");
}
}
state = null;
externalGroupBy = null;
}
use of org.apache.hyracks.dataflow.common.io.RunFileWriter in project asterixdb by apache.
the class ExternalGroupWriteOperatorNodePushable method initialize.
@Override
public void initialize() throws HyracksDataException {
ExternalGroupState aggState = (ExternalGroupState) ctx.getStateObject(stateId);
ISpillableTable table = aggState.getSpillableTable();
RunFileWriter[] partitionRuns = aggState.getRuns();
int[] numberOfTuples = aggState.getSpilledNumTuples();
try {
writer.open();
// level 0 use used at build stage.
doPass(table, partitionRuns, numberOfTuples, writer, 1);
} catch (Exception e) {
try {
for (RunFileWriter run : generatedRuns) {
run.erase();
}
} finally {
writer.fail();
}
throw e;
} finally {
writer.close();
}
}
use of org.apache.hyracks.dataflow.common.io.RunFileWriter in project asterixdb by apache.
the class ExternalGroupWriteOperatorNodePushable method doPass.
private void doPass(ISpillableTable table, RunFileWriter[] runs, int[] numOfTuples, IFrameWriter writer, int level) throws HyracksDataException {
assert table.getNumPartitions() == runs.length;
for (int i = 0; i < runs.length; i++) {
if (runs[i] == null) {
table.flushFrames(i, writer, AggregateType.FINAL);
}
}
table.close();
for (int i = 0; i < runs.length; i++) {
if (runs[i] != null) {
// Calculates the hash table size (# of unique hash values) based on the budget and a tuple size.
int memoryBudgetInBytes = ctx.getInitialFrameSize() * frameLimit;
int groupByColumnsCount = mergeGroupFields.length;
int hashTableCardinality = ExternalGroupOperatorDescriptor.calculateGroupByTableCardinality(memoryBudgetInBytes, groupByColumnsCount, ctx.getInitialFrameSize());
hashTableCardinality = (int) Math.min(hashTableCardinality, numOfTuples[i]);
ISpillableTable partitionTable = spillableTableFactory.buildSpillableTable(ctx, hashTableCardinality, runs[i].getFileSize(), mergeGroupFields, groupByComparators, nmkComputer, mergeAggregatorFactory, partialAggRecordDesc, outRecordDesc, frameLimit, level);
RunFileWriter[] runFileWriters = new RunFileWriter[partitionTable.getNumPartitions()];
int[] sizeInTuplesNextLevel = buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
if (runFileWriters[idFile] != null) {
generatedRuns.add(runFileWriters[idFile]);
}
}
if (LOGGER.isLoggable(Level.FINE)) {
int numOfSpilledPart = 0;
for (int x = 0; x < numOfTuples.length; x++) {
if (numOfTuples[x] > 0) {
numOfSpilledPart++;
}
}
LOGGER.fine("level " + level + ":" + "build with " + numOfTuples.length + " partitions" + ", spilled " + numOfSpilledPart + " partitions");
}
doPass(partitionTable, runFileWriters, sizeInTuplesNextLevel, writer, level + 1);
}
}
}
use of org.apache.hyracks.dataflow.common.io.RunFileWriter in project asterixdb by apache.
the class ExternalGroupBuildOperatorNodePushable method open.
@Override
public void open() throws HyracksDataException {
state = new ExternalGroupState(ctx.getJobletContext().getJobId(), stateId);
ISpillableTable table = spillableTableFactory.buildSpillableTable(ctx, tableSize, fileSize, keyFields, comparators, firstNormalizerComputer, aggregatorFactory, inRecordDescriptor, outRecordDescriptor, framesLimit, 0);
RunFileWriter[] runFileWriters = new RunFileWriter[table.getNumPartitions()];
this.externalGroupBy = new ExternalHashGroupBy(this, table, runFileWriters, inRecordDescriptor);
state.setSpillableTable(table);
state.setRuns(runFileWriters);
state.setSpilledNumTuples(externalGroupBy.getSpilledNumTuples());
}
use of org.apache.hyracks.dataflow.common.io.RunFileWriter in project asterixdb by apache.
the class ExternalHashGroupBy method insert.
public void insert(ByteBuffer buffer) throws HyracksDataException {
accessor.reset(buffer);
int tupleCount = accessor.getTupleCount();
for (int i = 0; i < tupleCount; i++) {
if (!table.insert(accessor, i)) {
do {
int partition = table.findVictimPartition(accessor, i);
if (partition < 0) {
throw new HyracksDataException("Failed to insert a new buffer into the aggregate operator!");
}
RunFileWriter writer = getPartitionWriterOrCreateOneIfNotExist(partition);
flushPartitionToRun(partition, writer);
} while (!table.insert(accessor, i));
}
}
}
Aggregations