Search in sources :

Example 1 with IterOutcome

use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.

the class HashJoinProbeTemplate method executeProbePhase.

public void executeProbePhase() throws SchemaChangeException {
    while (outputRecords < TARGET_RECORDS_PER_BATCH && probeState != ProbeState.DONE && probeState != ProbeState.PROJECT_RIGHT) {
        // Check if we have processed all records in this batch we need to invoke next
        if (recordsProcessed == recordsToProcess) {
            // Done processing all records in the previous batch, clean up!
            for (VectorWrapper<?> wrapper : probeBatch) {
                wrapper.getValueVector().clear();
            }
            IterOutcome leftUpstream = outgoingJoinBatch.next(HashJoinHelper.LEFT_INPUT, probeBatch);
            switch(leftUpstream) {
                case NONE:
                case NOT_YET:
                case STOP:
                    recordsProcessed = 0;
                    recordsToProcess = 0;
                    probeState = ProbeState.DONE;
                    // We are done with the probe phase. If its a RIGHT or a FULL join get the unmatched indexes from the build side
                    if (joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL) {
                        probeState = ProbeState.PROJECT_RIGHT;
                    }
                    continue;
                case OK_NEW_SCHEMA:
                    if (probeBatch.getSchema().equals(probeSchema)) {
                        doSetup(outgoingJoinBatch.getContext(), buildBatch, probeBatch, outgoingJoinBatch);
                        hashTable.updateBatches();
                    } else {
                        throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in probe side.", probeSchema, probeBatch.getSchema());
                    }
                case OK:
                    recordsToProcess = probeBatch.getRecordCount();
                    recordsProcessed = 0;
                    // If we received an empty batch do nothing
                    if (recordsToProcess == 0) {
                        continue;
                    }
            }
        }
        int probeIndex = -1;
        // Check if we need to drain the next row in the probe side
        if (getNextRecord) {
            if (hashTable != null) {
                probeIndex = hashTable.containsKey(recordsProcessed, true);
            }
            if (probeIndex != -1) {
                /* The current probe record has a key that matches. Get the index
             * of the first row in the build side that matches the current key
             */
                currentCompositeIdx = hjHelper.getStartIndex(probeIndex);
                /* Record in the build side at currentCompositeIdx has a matching record in the probe
             * side. Set the bit corresponding to this index so if we are doing a FULL or RIGHT
             * join we keep track of which records we need to project at the end
             */
                hjHelper.setRecordMatched(currentCompositeIdx);
                projectBuildRecord(currentCompositeIdx, outputRecords);
                projectProbeRecord(recordsProcessed, outputRecords);
                outputRecords++;
                /* Projected single row from the build side with matching key but there
             * may be more rows with the same key. Check if that's the case
             */
                currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
                if (currentCompositeIdx == -1) {
                    /* We only had one row in the build side that matched the current key
               * from the probe side. Drain the next row in the probe side.
               */
                    recordsProcessed++;
                } else {
                    /* There is more than one row with the same key on the build side
               * don't drain more records from the probe side till we have projected
               * all the rows with this key
               */
                    getNextRecord = false;
                }
            } else {
                // If we have a left outer join, project the keys
                if (joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL) {
                    projectProbeRecord(recordsProcessed, outputRecords);
                    outputRecords++;
                }
                recordsProcessed++;
            }
        } else {
            hjHelper.setRecordMatched(currentCompositeIdx);
            projectBuildRecord(currentCompositeIdx, outputRecords);
            projectProbeRecord(recordsProcessed, outputRecords);
            outputRecords++;
            currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
            if (currentCompositeIdx == -1) {
                // We don't have any more rows matching the current key on the build side, move on to the next probe row
                getNextRecord = true;
                recordsProcessed++;
            }
        }
    }
}
Also used : IterOutcome(org.apache.drill.exec.record.RecordBatch.IterOutcome)

Example 2 with IterOutcome

use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.

the class BaseRootExec method next.

public final IterOutcome next(final RecordBatch b) {
    stats.stopProcessing();
    IterOutcome next;
    try {
        next = b.next();
    } finally {
        stats.startProcessing();
    }
    switch(next) {
        case OK_NEW_SCHEMA:
            stats.batchReceived(0, b.getRecordCount(), true);
            break;
        case OK:
            stats.batchReceived(0, b.getRecordCount(), false);
            break;
    }
    return next;
}
Also used : IterOutcome(org.apache.drill.exec.record.RecordBatch.IterOutcome)

Example 3 with IterOutcome

use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.

the class PartitionSenderRootExec method innerNext.

@Override
public boolean innerNext() {
    if (!ok) {
        return false;
    }
    IterOutcome out;
    if (!done) {
        out = next(incoming);
    } else {
        incoming.kill(true);
        out = IterOutcome.NONE;
    }
    logger.debug("Partitioner.next(): got next record batch with status {}", out);
    if (first && out == IterOutcome.OK) {
        out = IterOutcome.OK_NEW_SCHEMA;
    }
    switch(out) {
        case NONE:
            try {
                // send any pending batches
                if (partitioner != null) {
                    partitioner.flushOutgoingBatches(true, false);
                } else {
                    sendEmptyBatch(true);
                }
            } catch (IOException e) {
                incoming.kill(false);
                logger.error("Error while creating partitioning sender or flushing outgoing batches", e);
                context.fail(e);
            }
            return false;
        case OUT_OF_MEMORY:
            throw new OutOfMemoryException();
        case STOP:
            if (partitioner != null) {
                partitioner.clear();
            }
            return false;
        case OK_NEW_SCHEMA:
            try {
                // send all existing batches
                if (partitioner != null) {
                    partitioner.flushOutgoingBatches(false, true);
                    partitioner.clear();
                }
                createPartitioner();
                if (first) {
                    // Send an empty batch for fast schema
                    first = false;
                    sendEmptyBatch(false);
                }
            } catch (IOException e) {
                incoming.kill(false);
                logger.error("Error while flushing outgoing batches", e);
                context.fail(e);
                return false;
            } catch (SchemaChangeException e) {
                incoming.kill(false);
                logger.error("Error while setting up partitioner", e);
                context.fail(e);
                return false;
            }
        case OK:
            try {
                partitioner.partitionBatch(incoming);
            } catch (IOException e) {
                context.fail(e);
                incoming.kill(false);
                return false;
            }
            for (VectorWrapper<?> v : incoming) {
                v.clear();
            }
            return true;
        case NOT_YET:
        default:
            throw new IllegalStateException();
    }
}
Also used : IterOutcome(org.apache.drill.exec.record.RecordBatch.IterOutcome) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) IOException(java.io.IOException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException)

Example 4 with IterOutcome

use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.

the class IteratorValidatorBatchIterator method next.

@Override
public IterOutcome next() {
    logger.trace("[#{}; on {}]: next() called.", instNum, batchTypeName);
    final IterOutcome prevBatchState = batchState;
    try {
        // Check whether next() should even have been called in current state.
        if (null != exceptionState) {
            throw new IllegalStateException(String.format("next() [on #%d; %s] called again after it threw %s (after" + " returning %s).  Caller should not have called next() again.", instNum, batchTypeName, exceptionState, batchState));
        }
        // (Note:  This could use validationState.)
        if (batchState == NONE || batchState == STOP) {
            throw new IllegalStateException(String.format("next() [on #%d, %s] called again after it returned %s." + "  Caller should not have called next() again.", instNum, batchTypeName, batchState));
        }
        // Now get result from upstream next().
        batchState = incoming.next();
        logger.trace("[#{}; on {}]: incoming next() return: ({} ->) {}", instNum, batchTypeName, prevBatchState, batchState);
        // Check state transition and update high-level state.
        switch(batchState) {
            case OK_NEW_SCHEMA:
                // OK_NEW_SCHEMA is allowed at any time, except if terminated (checked
                // above).
                // OK_NEW_SCHEMA moves to have-seen-schema state.
                validationState = ValidationState.HAVE_SCHEMA;
                validateBatch();
                break;
            case OK:
                // (checked above).
                if (validationState != ValidationState.HAVE_SCHEMA) {
                    throw new IllegalStateException(String.format("next() returned %s without first returning %s [#%d, %s]", batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
                }
                validateBatch();
                // OK doesn't change high-level state.
                break;
            case NONE:
                // already terminated (checked above).
                if (validationState != ValidationState.HAVE_SCHEMA) {
                    throw new IllegalStateException(String.format("next() returned %s without first returning %s [#%d, %s]", batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
                }
                // NONE moves to terminal high-level state.
                validationState = ValidationState.TERMINAL;
                break;
            case STOP:
                // STOP is allowed at any time, except if already terminated (checked
                // above).
                // STOP moves to terminal high-level state.
                validationState = ValidationState.TERMINAL;
                break;
            case NOT_YET:
            case OUT_OF_MEMORY:
                // NOT_YET and OUT_OF_MEMORY OK don't change high-level state.
                break;
            default:
                throw new AssertionError("Unhandled new " + IterOutcome.class.getSimpleName() + " value " + batchState);
        }
        // Validate schema when available.
        if (batchState == OK || batchState == OK_NEW_SCHEMA) {
            final BatchSchema prevLastSchema = lastSchema;
            final BatchSchema prevLastNewSchema = lastNewSchema;
            lastSchema = incoming.getSchema();
            if (batchState == OK_NEW_SCHEMA) {
                lastNewSchema = lastSchema;
            }
            if (logger.isTraceEnabled()) {
                logger.trace("[#{}; on {}]: incoming next() return: #records = {}, " + "\n  schema:" + "\n    {}, " + "\n  prev. new ({}):" + "\n    {}", instNum, batchTypeName, incoming.getRecordCount(), lastSchema, lastSchema.equals(prevLastNewSchema) ? "equal" : "not equal", prevLastNewSchema);
            }
            if (lastSchema == null) {
                throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has a null schema. This is not allowed.", instNum, batchTypeName));
            }
            if (lastSchema.getFieldCount() == 0) {
                throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has an empty schema. This is not allowed.", instNum, batchTypeName));
            }
            if (incoming.getRecordCount() > MAX_BATCH_SIZE) {
                throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has size %d, which is beyond the" + " limit of %d", instNum, batchTypeName, incoming.getRecordCount(), MAX_BATCH_SIZE));
            }
            if (VALIDATE_VECTORS) {
                VectorValidator.validate(incoming);
            }
        }
        return batchState;
    } catch (RuntimeException | Error e) {
        exceptionState = e;
        logger.trace("[#{}, on {}]: incoming next() exception: ({} ->) {}", instNum, batchTypeName, prevBatchState, exceptionState, exceptionState);
        throw e;
    }
}
Also used : IterOutcome(org.apache.drill.exec.record.RecordBatch.IterOutcome) BatchSchema(org.apache.drill.exec.record.BatchSchema)

Aggregations

IterOutcome (org.apache.drill.exec.record.RecordBatch.IterOutcome)4 IOException (java.io.IOException)1 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)1 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1 BatchSchema (org.apache.drill.exec.record.BatchSchema)1