Search in sources :

Example 51 with SchemaChangeException

use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.

the class TopNBatch method innerNext.

@Override
public IterOutcome innerNext() {
    recordCount = 0;
    if (state == BatchState.DONE) {
        return IterOutcome.NONE;
    }
    if (schema != null) {
        if (getSelectionVector4().next()) {
            recordCount = sv4.getCount();
            return IterOutcome.OK;
        } else {
            recordCount = 0;
            return IterOutcome.NONE;
        }
    }
    try {
        outer: while (true) {
            Stopwatch watch = Stopwatch.createStarted();
            IterOutcome upstream;
            if (first) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                first = false;
            } else {
                upstream = next(incoming);
            }
            if (upstream == IterOutcome.OK && schema == null) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                container.clear();
            }
            logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
            switch(upstream) {
                case NONE:
                    break outer;
                case NOT_YET:
                    throw new UnsupportedOperationException();
                case OUT_OF_MEMORY:
                case STOP:
                    return upstream;
                case OK_NEW_SCHEMA:
                    // only change in the case that the schema truly changes.  Artificial schema changes are ignored.
                    if (!incoming.getSchema().equals(schema)) {
                        if (schema != null) {
                            if (!unionTypeEnabled) {
                                throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
                            } else {
                                this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
                                purgeAndResetPriorityQueue();
                                this.schemaChanged = true;
                            }
                        } else {
                            this.schema = incoming.getSchema();
                        }
                    }
                // fall through.
                case OK:
                    if (incoming.getRecordCount() == 0) {
                        for (VectorWrapper<?> w : incoming) {
                            w.clear();
                        }
                        break;
                    }
                    countSincePurge += incoming.getRecordCount();
                    batchCount++;
                    RecordBatchData batch;
                    if (schemaChanged) {
                        batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
                    } else {
                        batch = new RecordBatchData(incoming, oContext.getAllocator());
                    }
                    boolean success = false;
                    try {
                        batch.canonicalize();
                        if (priorityQueue == null) {
                            assert !schemaChanged;
                            priorityQueue = createNewPriorityQueue(context, config.getOrderings(), new ExpandableHyperContainer(batch.getContainer()), MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
                        }
                        priorityQueue.add(context, batch);
                        if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
                            purge();
                            countSincePurge = 0;
                            batchCount = 0;
                        }
                        success = true;
                    } finally {
                        if (!success) {
                            batch.clear();
                        }
                    }
                    break;
                default:
                    throw new UnsupportedOperationException();
            }
        }
        if (schema == null || priorityQueue == null) {
            // builder may be null at this point if the first incoming batch is empty
            state = BatchState.DONE;
            return IterOutcome.NONE;
        }
        priorityQueue.generate();
        this.sv4 = priorityQueue.getFinalSv4();
        container.clear();
        for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
            container.add(w.getValueVectors());
        }
        container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
        recordCount = sv4.getCount();
        return IterOutcome.OK_NEW_SCHEMA;
    } catch (SchemaChangeException | ClassTransformationException | IOException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
    }
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(com.google.common.base.Stopwatch) IOException(java.io.IOException)

Example 52 with SchemaChangeException

use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.

the class FlattenTemplate method flattenRecords.

@Override
public final int flattenRecords(final int recordCount, final int firstOutputIndex, final Flattener.Monitor monitor) {
    switch(svMode) {
        case FOUR_BYTE:
            throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
        case TWO_BYTE:
            throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
        case NONE:
            if (innerValueIndex == -1) {
                innerValueIndex = 0;
            }
            final int initialInnerValueIndex = currentInnerValueIndex;
            // restore state to local stack
            int valueIndexLocal = valueIndex;
            int innerValueIndexLocal = innerValueIndex;
            int currentInnerValueIndexLocal = currentInnerValueIndex;
            outer: {
                int outputIndex = firstOutputIndex;
                int recordsThisCall = 0;
                final int valueCount = accessor.getValueCount();
                for (; valueIndexLocal < valueCount; valueIndexLocal++) {
                    final int innerValueCount = accessor.getInnerValueCountAt(valueIndexLocal);
                    for (; innerValueIndexLocal < innerValueCount; innerValueIndexLocal++) {
                        // If we've hit the batch size limit, stop and flush what we've got so far.
                        if (recordsThisCall == outputLimit) {
                            if (bigRecords) {
                                /*
                   * We got to the limit we used before, but did we go over
                   * the bigRecordsBufferSize in the second half of the batch? If
                   * so, we'll need to adjust the batch limits.
                   */
                                adjustBatchLimits(1, monitor, recordsThisCall);
                            }
                            // Flush this batch.
                            break outer;
                        }
                        /*
               * At the moment, the output record includes the input record, so for very
               * large records that we're flattening, we're carrying forward the original
               * record as well as the flattened element. We've seen a case where flattening a 4MB
               * record with a 20,000 element array causing memory usage to explode. To avoid
               * that until we can push down the selected fields to operators like this, we
               * also limit the amount of memory in use at one time.
               *
               * We have to have written at least one record to be able to get a buffer that will
               * have a real allocator, so we have to do this lazily. We won't check the limit
               * for the first two records, but that keeps this simple.
               */
                        if (bigRecords) {
                            /*
                 * If we're halfway through the outputLimit, check on our memory
                 * usage so far.
                 */
                            if (recordsThisCall == outputLimit / 2) {
                                /*
                   * If we've used more than half the space we've used for big records
                   * in the past, we've seen even bigger records than before, so stop and
                   * see if we need to flush here before we go over bigRecordsBufferSize
                   * memory usage, and reduce the outputLimit further before we continue
                   * with the next batch.
                   */
                                if (adjustBatchLimits(2, monitor, recordsThisCall)) {
                                    break outer;
                                }
                            }
                        } else {
                            if (outputAllocator.getAllocatedMemory() > OUTPUT_MEMORY_LIMIT) {
                                /*
                   * We're dealing with big records. Reduce the outputLimit to
                   * the current record count, and take note of how much space the
                   * vectors report using for that. We'll use those numbers as limits
                   * going forward in order to avoid allocating more memory.
                   */
                                bigRecords = true;
                                outputLimit = Math.min(recordsThisCall, outputLimit);
                                if (outputLimit < 1) {
                                    throw new IllegalStateException("flatten outputLimit (" + outputLimit + ") won't make progress");
                                }
                                /*
                   * This will differ from what the allocator reports because of
                   * overhead. But the allocator check is much cheaper to do, so we
                   * only compute this at selected times.
                   */
                                bigRecordsBufferSize = monitor.getBufferSizeFor(recordsThisCall);
                                // Stop and flush.
                                break outer;
                            }
                        }
                        try {
                            doEval(valueIndexLocal, outputIndex);
                        } catch (OversizedAllocationException ex) {
                            // unable to flatten due to a soft buffer overflow. split the batch here and resume execution.
                            logger.debug("Reached allocation limit. Splitting the batch at input index: {} - inner index: {} - current completed index: {}", valueIndexLocal, innerValueIndexLocal, currentInnerValueIndexLocal);
                            /*
                 * TODO
                 * We can't further reduce the output limits here because it won't have
                 * any effect. The vectors have already gotten large, and there's currently
                 * no way to reduce their size. Ideally, we could reduce the outputLimit,
                 * and reduce the size of the currently used vectors.
                 */
                            break outer;
                        } catch (SchemaChangeException e) {
                            throw new UnsupportedOperationException(e);
                        }
                        outputIndex++;
                        currentInnerValueIndexLocal++;
                        ++recordsThisCall;
                    }
                    innerValueIndexLocal = 0;
                }
            }
            // save state to heap
            valueIndex = valueIndexLocal;
            innerValueIndex = innerValueIndexLocal;
            currentInnerValueIndex = currentInnerValueIndexLocal;
            // transfer the computed range
            final int delta = currentInnerValueIndexLocal - initialInnerValueIndex;
            for (TransferPair t : transfers) {
                t.splitAndTransfer(initialInnerValueIndex, delta);
            }
            return delta;
        default:
            throw new UnsupportedOperationException();
    }
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) OversizedAllocationException(org.apache.drill.exec.exception.OversizedAllocationException)

Example 53 with SchemaChangeException

use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.

the class HashJoinBatch method setupHashTable.

public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
    // Setup the hash table configuration object
    int conditionsSize = conditions.size();
    final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
    List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);
    // Create named expressions from the conditions
    for (int i = 0; i < conditionsSize; i++) {
        rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
        leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
    }
    // Set the left named expression to be null if the probe batch is empty.
    if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
        leftExpr = null;
    } else {
        if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
            final String errorMsg = new StringBuilder().append("Hash join does not support probe batch with selection vectors. ").append("Probe batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
            throw new SchemaChangeException(errorMsg);
        }
    }
    final HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);
    // Create the chained hash table
    final ChainedHashTable ht = new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
    hashTable = ht.createAndSetupHashTable(null);
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) HashTableConfig(org.apache.drill.exec.physical.impl.common.HashTableConfig) FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 54 with SchemaChangeException

use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.

the class HashJoinBatch method executeBuildPhase.

public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
    // skip first batch if count is zero, as it may be an empty schema batch
    if (right.getRecordCount() == 0) {
        for (final VectorWrapper<?> w : right) {
            w.clear();
        }
        rightUpstream = next(right);
    }
    boolean moreData = true;
    while (moreData) {
        switch(rightUpstream) {
            case OUT_OF_MEMORY:
            case NONE:
            case NOT_YET:
            case STOP:
                moreData = false;
                continue;
            case OK_NEW_SCHEMA:
                if (rightSchema == null) {
                    rightSchema = right.getSchema();
                    if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
                        final String errorMsg = new StringBuilder().append("Hash join does not support build batch with selection vectors. ").append("Build batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
                        throw new SchemaChangeException(errorMsg);
                    }
                    setupHashTable();
                } else {
                    if (!rightSchema.equals(right.getSchema())) {
                        throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
                    }
                    hashTable.updateBatches();
                }
            // Fall through
            case OK:
                final int currentRecordCount = right.getRecordCount();
                /* For every new build batch, we store some state in the helper context
                     * Add new state to the helper context
                     */
                hjHelper.addNewBatch(currentRecordCount);
                // Holder contains the global index where the key is hashed into using the hash table
                final IndexPointer htIndex = new IndexPointer();
                // For every record in the build batch , hash the key columns
                for (int i = 0; i < currentRecordCount; i++) {
                    hashTable.put(i, htIndex, 1);
                    /* Use the global index returned by the hash table, to store
                         * the current record index and batch index. This will be used
                         * later when we probe and find a match.
                         */
                    hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
                }
                /* Completed hashing all records in this batch. Transfer the batch
                     * to the hyper vector container. Will be used when we want to retrieve
                     * records that have matching keys on the probe side.
                     */
                final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
                boolean success = false;
                try {
                    if (hyperContainer == null) {
                        hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
                    } else {
                        hyperContainer.addBatch(nextBatch.getContainer());
                    }
                    // completed processing a batch, increment batch index
                    buildBatchIndex++;
                    success = true;
                } finally {
                    if (!success) {
                        nextBatch.clear();
                    }
                }
                break;
        }
        // Get the next record batch
        rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer)

Example 55 with SchemaChangeException

use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.

the class HashJoinBatch method buildSchema.

@Override
protected void buildSchema() throws SchemaChangeException {
    leftUpstream = next(left);
    rightUpstream = next(right);
    if (leftUpstream == IterOutcome.STOP || rightUpstream == IterOutcome.STOP) {
        state = BatchState.STOP;
        return;
    }
    if (leftUpstream == IterOutcome.OUT_OF_MEMORY || rightUpstream == IterOutcome.OUT_OF_MEMORY) {
        state = BatchState.OUT_OF_MEMORY;
        return;
    }
    // Initialize the hash join helper context
    hjHelper = new HashJoinHelper(context, oContext.getAllocator());
    try {
        rightSchema = right.getSchema();
        final VectorContainer vectors = new VectorContainer(oContext);
        for (final VectorWrapper<?> w : right) {
            vectors.addOrGet(w.getField());
        }
        vectors.buildSchema(SelectionVectorMode.NONE);
        vectors.setRecordCount(0);
        hyperContainer = new ExpandableHyperContainer(vectors);
        hjHelper.addNewBatch(0);
        buildBatchIndex++;
        setupHashTable();
        hashJoinProbe = setupHashJoinProbe();
        // Build the container schema and set the counts
        for (final VectorWrapper<?> w : container) {
            w.getValueVector().allocateNew();
        }
        container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        container.setRecordCount(outputRecords);
    } catch (IOException | ClassTransformationException e) {
        throw new SchemaChangeException(e);
    }
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) IOException(java.io.IOException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Aggregations

SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)66 IOException (java.io.IOException)23 MaterializedField (org.apache.drill.exec.record.MaterializedField)20 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)18 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)18 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)18 ValueVector (org.apache.drill.exec.vector.ValueVector)18 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)16 TransferPair (org.apache.drill.exec.record.TransferPair)9 HoldingContainer (org.apache.drill.exec.expr.ClassGenerator.HoldingContainer)8 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)8 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)7 Ordering (org.apache.drill.common.logical.data.Order.Ordering)7 JConditional (com.sun.codemodel.JConditional)6 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)6 ValueVectorWriteExpression (org.apache.drill.exec.expr.ValueVectorWriteExpression)6 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)5