use of org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch in project drill by apache.
the class HashJoinBatch method innerNext.
@Override
public IterOutcome innerNext() {
if (wasKilled) {
// We have received a cancel signal. We need to stop processing.
cleanup();
return IterOutcome.NONE;
}
prefetchFirstBuildBatch();
if (rightUpstream.isError()) {
// We need to terminate.
return rightUpstream;
}
try {
/*
* If we are here for the first time, execute the build phase of the hash
* join and setup the run time generated class for the probe side
*/
if (state == BatchState.FIRST) {
// Build the hash table, using the build side record batches.
IterOutcome buildExecuteTermination = executeBuildPhase();
if (buildExecuteTermination != null) {
// We need to terminate.
return buildExecuteTermination;
}
buildComplete = true;
if (isRowKeyJoin) {
// discard the first left batch which was fetched by buildSchema, and
// get the new
// one based on rowkey join
leftUpstream = next(left);
}
// Update the hash table related stats for the operator
updateStats();
}
// Try to probe and project, or recursively handle a spilled partition
if (// If there are build-side rows
!buildSideIsEmpty.booleanValue() || joinIsLeftOrFull) {
// or if this is a left/full outer join
prefetchFirstProbeBatch();
if (leftUpstream.isError() || (leftUpstream == NONE && !joinIsRightOrFull)) {
// We need to terminate.
return leftUpstream;
}
if (!buildSideIsEmpty.booleanValue() || !probeSideIsEmpty.booleanValue()) {
if (state == BatchState.FIRST) {
// Initialize various settings for the probe side
hashJoinProbe.setupHashJoinProbe(probeBatch, this, joinType, semiJoin, leftUpstream, partitions, spilledState.getCycle(), container, spilledInners, buildSideIsEmpty.booleanValue(), numPartitions, rightHVColPosition);
}
// Allocate the memory for the vectors in the output container
batchMemoryManager.allocateVectors(container);
hashJoinProbe.setTargetOutputCount(batchMemoryManager.getOutputRowCount());
outputRecords = hashJoinProbe.probeAndProject();
container.setValueCount(outputRecords);
batchMemoryManager.updateOutgoingStats(outputRecords);
RecordBatchStats.logRecordBatchStats(RecordBatchIOType.OUTPUT, this, getRecordBatchStatsContext());
/*
* We are here because of one the following 1. Completed processing of
* all the records and we are done 2. We've filled up the outgoing
* batch to the maximum and we need to return upstream Either case
* build the output container's schema and return
*/
if (outputRecords > 0 || state == BatchState.FIRST) {
state = BatchState.NOT_FIRST;
return IterOutcome.OK;
}
}
// (In case need to start processing spilled partitions)
for (HashPartition partn : partitions) {
// clean, but do not delete the spill files !!
partn.cleanup(false);
}
//
if (!buildSideIsEmpty.booleanValue()) {
while (!spilledState.isEmpty()) {
// "while" is only used for
// skipping; see "continue" below
// Get the next (previously) spilled partition to handle as incoming
HashJoinSpilledPartition currSp = spilledState.getNextSpilledPartition();
// next spilled partition
if (currSp.outerSpilledBatches == 0 && !joinIsRightOrFull) {
continue;
}
// Create a BUILD-side "incoming" out of the inner spill file of
// that partition
buildBatch = new SpilledRecordBatch(currSp.innerSpillFile, currSp.innerSpilledBatches, context, buildSchema, oContext, spillSet);
// The above ctor call also got the first batch; need to update the
// outcome
rightUpstream = ((SpilledRecordBatch) buildBatch).getInitialOutcome();
if (currSp.outerSpilledBatches > 0) {
// Create a PROBE-side "incoming" out of the outer spill file of
// that partition
probeBatch = new SpilledRecordBatch(currSp.outerSpillFile, currSp.outerSpilledBatches, context, probeSchema, oContext, spillSet);
// The above ctor call also got the first batch; need to update
// the outcome
leftUpstream = ((SpilledRecordBatch) probeBatch).getInitialOutcome();
} else {
// if no outer batch then reuse left - needed
probeBatch = left;
// for updateIncoming()
leftUpstream = IterOutcome.NONE;
hashJoinProbe.changeToFinalProbeState();
}
spilledState.updateCycle(stats, currSp, spilledStateUpdater);
// TODO need to determine if this is still
state = BatchState.FIRST;
// necessary since
// prefetchFirstBatchFromBothSides sets
// this
prefetchedBuild.setValue(false);
prefetchedProbe.setValue(false);
// start processing the next spilled partition
return innerNext();
// "recursively"
}
}
} else {
// Our build side is empty, we won't have any matches, clear the probe
// side
killAndDrainLeftUpstream();
}
// No more output records, clean up and return
state = BatchState.DONE;
cleanup();
return IterOutcome.NONE;
} catch (SchemaChangeException e) {
throw UserException.schemaChangeError(e).build(logger);
}
}
Aggregations