Search in sources :

Example 1 with SpilledRecordBatch

use of org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch in project drill by apache.

the class HashJoinBatch method innerNext.

@Override
public IterOutcome innerNext() {
    if (wasKilled) {
        // We have received a cancel signal. We need to stop processing.
        cleanup();
        return IterOutcome.NONE;
    }
    prefetchFirstBuildBatch();
    if (rightUpstream.isError()) {
        // We need to terminate.
        return rightUpstream;
    }
    try {
        /*
       * If we are here for the first time, execute the build phase of the hash
       * join and setup the run time generated class for the probe side
       */
        if (state == BatchState.FIRST) {
            // Build the hash table, using the build side record batches.
            IterOutcome buildExecuteTermination = executeBuildPhase();
            if (buildExecuteTermination != null) {
                // We need to terminate.
                return buildExecuteTermination;
            }
            buildComplete = true;
            if (isRowKeyJoin) {
                // discard the first left batch which was fetched by buildSchema, and
                // get the new
                // one based on rowkey join
                leftUpstream = next(left);
            }
            // Update the hash table related stats for the operator
            updateStats();
        }
        // Try to probe and project, or recursively handle a spilled partition
        if (// If there are build-side rows
        !buildSideIsEmpty.booleanValue() || joinIsLeftOrFull) {
            // or if this is a left/full outer join
            prefetchFirstProbeBatch();
            if (leftUpstream.isError() || (leftUpstream == NONE && !joinIsRightOrFull)) {
                // We need to terminate.
                return leftUpstream;
            }
            if (!buildSideIsEmpty.booleanValue() || !probeSideIsEmpty.booleanValue()) {
                if (state == BatchState.FIRST) {
                    // Initialize various settings for the probe side
                    hashJoinProbe.setupHashJoinProbe(probeBatch, this, joinType, semiJoin, leftUpstream, partitions, spilledState.getCycle(), container, spilledInners, buildSideIsEmpty.booleanValue(), numPartitions, rightHVColPosition);
                }
                // Allocate the memory for the vectors in the output container
                batchMemoryManager.allocateVectors(container);
                hashJoinProbe.setTargetOutputCount(batchMemoryManager.getOutputRowCount());
                outputRecords = hashJoinProbe.probeAndProject();
                container.setValueCount(outputRecords);
                batchMemoryManager.updateOutgoingStats(outputRecords);
                RecordBatchStats.logRecordBatchStats(RecordBatchIOType.OUTPUT, this, getRecordBatchStatsContext());
                /*
           * We are here because of one the following 1. Completed processing of
           * all the records and we are done 2. We've filled up the outgoing
           * batch to the maximum and we need to return upstream Either case
           * build the output container's schema and return
           */
                if (outputRecords > 0 || state == BatchState.FIRST) {
                    state = BatchState.NOT_FIRST;
                    return IterOutcome.OK;
                }
            }
            // (In case need to start processing spilled partitions)
            for (HashPartition partn : partitions) {
                // clean, but do not delete the spill files !!
                partn.cleanup(false);
            }
            // 
            if (!buildSideIsEmpty.booleanValue()) {
                while (!spilledState.isEmpty()) {
                    // "while" is only used for
                    // skipping; see "continue" below
                    // Get the next (previously) spilled partition to handle as incoming
                    HashJoinSpilledPartition currSp = spilledState.getNextSpilledPartition();
                    // next spilled partition
                    if (currSp.outerSpilledBatches == 0 && !joinIsRightOrFull) {
                        continue;
                    }
                    // Create a BUILD-side "incoming" out of the inner spill file of
                    // that partition
                    buildBatch = new SpilledRecordBatch(currSp.innerSpillFile, currSp.innerSpilledBatches, context, buildSchema, oContext, spillSet);
                    // The above ctor call also got the first batch; need to update the
                    // outcome
                    rightUpstream = ((SpilledRecordBatch) buildBatch).getInitialOutcome();
                    if (currSp.outerSpilledBatches > 0) {
                        // Create a PROBE-side "incoming" out of the outer spill file of
                        // that partition
                        probeBatch = new SpilledRecordBatch(currSp.outerSpillFile, currSp.outerSpilledBatches, context, probeSchema, oContext, spillSet);
                        // The above ctor call also got the first batch; need to update
                        // the outcome
                        leftUpstream = ((SpilledRecordBatch) probeBatch).getInitialOutcome();
                    } else {
                        // if no outer batch then reuse left - needed
                        probeBatch = left;
                        // for updateIncoming()
                        leftUpstream = IterOutcome.NONE;
                        hashJoinProbe.changeToFinalProbeState();
                    }
                    spilledState.updateCycle(stats, currSp, spilledStateUpdater);
                    // TODO need to determine if this is still
                    state = BatchState.FIRST;
                    // necessary since
                    // prefetchFirstBatchFromBothSides sets
                    // this
                    prefetchedBuild.setValue(false);
                    prefetchedProbe.setValue(false);
                    // start processing the next spilled partition
                    return innerNext();
                // "recursively"
                }
            }
        } else {
            // Our build side is empty, we won't have any matches, clear the probe
            // side
            killAndDrainLeftUpstream();
        }
        // No more output records, clean up and return
        state = BatchState.DONE;
        cleanup();
        return IterOutcome.NONE;
    } catch (SchemaChangeException e) {
        throw UserException.schemaChangeError(e).build(logger);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SpilledRecordBatch(org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch) HashPartition(org.apache.drill.exec.physical.impl.common.HashPartition)

Aggregations

SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1 SpilledRecordBatch (org.apache.drill.exec.physical.impl.aggregate.SpilledRecordBatch)1 HashPartition (org.apache.drill.exec.physical.impl.common.HashPartition)1