Search in sources :

Example 1 with ExpandableHyperContainer

use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.

the class PriorityQueueTemplate method add.

@SuppressWarnings("resource")
@Override
public void add(FragmentContext context, RecordBatchData batch) throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    if (hyperBatch == null) {
        hyperBatch = new ExpandableHyperContainer(batch.getContainer());
    } else {
        hyperBatch.addBatch(batch.getContainer());
    }
    // may not need to do this every time
    doSetup(context, hyperBatch, null);
    int count = 0;
    SelectionVector2 sv2 = null;
    if (hasSv2) {
        sv2 = batch.getSv2();
    }
    for (; queueSize < limit && count < batch.getRecordCount(); count++) {
        heapSv4.set(queueSize, batchCount, hasSv2 ? sv2.getIndex(count) : count);
        queueSize++;
        siftUp();
    }
    for (; count < batch.getRecordCount(); count++) {
        heapSv4.set(limit, batchCount, hasSv2 ? sv2.getIndex(count) : count);
        if (compare(limit, 0) < 0) {
            swap(limit, 0);
            siftDown();
        }
    }
    batchCount++;
    if (hasSv2) {
        sv2.clear();
    }
    logger.debug("Took {} us to add {} records", watch.elapsed(TimeUnit.MICROSECONDS), count);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) Stopwatch(com.google.common.base.Stopwatch) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2)

Example 2 with ExpandableHyperContainer

use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.

the class MergingRecordBatch method createMerger.

//  private boolean isOutgoingFull() {
//    return outgoingPosition == DEFAULT_ALLOC_RECORD_COUNT;
//  }
/**
   * Creates a generate class which implements the copy and compare methods.
   *
   * @return instance of a new merger based on generated code
   * @throws SchemaChangeException
   */
private MergingReceiverGeneratorBase createMerger() throws SchemaChangeException {
    try {
        final CodeGenerator<MergingReceiverGeneratorBase> cg = CodeGenerator.get(MergingReceiverGeneratorBase.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
        cg.plainJavaCapable(true);
        // Uncomment out this line to debug the generated code.
        //      cg.saveCodeForDebugging(true);
        final ClassGenerator<MergingReceiverGeneratorBase> g = cg.getRoot();
        ExpandableHyperContainer batch = null;
        boolean first = true;
        for (final RecordBatchLoader loader : batchLoaders) {
            if (first) {
                batch = new ExpandableHyperContainer(loader);
                first = false;
            } else {
                batch.addBatch(loader);
            }
        }
        generateComparisons(g, batch);
        g.setMappingSet(COPIER_MAPPING_SET);
        CopyUtil.generateCopies(g, batch, true);
        g.setMappingSet(MAIN_MAPPING);
        final MergingReceiverGeneratorBase merger = context.getImplementationClass(cg);
        merger.doSetup(context, batch, outgoingContainer);
        return merger;
    } catch (ClassTransformationException | IOException e) {
        throw new SchemaChangeException(e);
    }
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) IOException(java.io.IOException)

Example 3 with ExpandableHyperContainer

use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.

the class TopNBatch method innerNext.

@Override
public IterOutcome innerNext() {
    recordCount = 0;
    if (state == BatchState.DONE) {
        return IterOutcome.NONE;
    }
    if (schema != null) {
        if (getSelectionVector4().next()) {
            recordCount = sv4.getCount();
            return IterOutcome.OK;
        } else {
            recordCount = 0;
            return IterOutcome.NONE;
        }
    }
    try {
        outer: while (true) {
            Stopwatch watch = Stopwatch.createStarted();
            IterOutcome upstream;
            if (first) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                first = false;
            } else {
                upstream = next(incoming);
            }
            if (upstream == IterOutcome.OK && schema == null) {
                upstream = IterOutcome.OK_NEW_SCHEMA;
                container.clear();
            }
            logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
            switch(upstream) {
                case NONE:
                    break outer;
                case NOT_YET:
                    throw new UnsupportedOperationException();
                case OUT_OF_MEMORY:
                case STOP:
                    return upstream;
                case OK_NEW_SCHEMA:
                    // only change in the case that the schema truly changes.  Artificial schema changes are ignored.
                    if (!incoming.getSchema().equals(schema)) {
                        if (schema != null) {
                            if (!unionTypeEnabled) {
                                throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
                            } else {
                                this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
                                purgeAndResetPriorityQueue();
                                this.schemaChanged = true;
                            }
                        } else {
                            this.schema = incoming.getSchema();
                        }
                    }
                // fall through.
                case OK:
                    if (incoming.getRecordCount() == 0) {
                        for (VectorWrapper<?> w : incoming) {
                            w.clear();
                        }
                        break;
                    }
                    countSincePurge += incoming.getRecordCount();
                    batchCount++;
                    RecordBatchData batch;
                    if (schemaChanged) {
                        batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
                    } else {
                        batch = new RecordBatchData(incoming, oContext.getAllocator());
                    }
                    boolean success = false;
                    try {
                        batch.canonicalize();
                        if (priorityQueue == null) {
                            assert !schemaChanged;
                            priorityQueue = createNewPriorityQueue(context, config.getOrderings(), new ExpandableHyperContainer(batch.getContainer()), MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
                        }
                        priorityQueue.add(context, batch);
                        if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
                            purge();
                            countSincePurge = 0;
                            batchCount = 0;
                        }
                        success = true;
                    } finally {
                        if (!success) {
                            batch.clear();
                        }
                    }
                    break;
                default:
                    throw new UnsupportedOperationException();
            }
        }
        if (schema == null || priorityQueue == null) {
            // builder may be null at this point if the first incoming batch is empty
            state = BatchState.DONE;
            return IterOutcome.NONE;
        }
        priorityQueue.generate();
        this.sv4 = priorityQueue.getFinalSv4();
        container.clear();
        for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
            container.add(w.getValueVectors());
        }
        container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
        recordCount = sv4.getCount();
        return IterOutcome.OK_NEW_SCHEMA;
    } catch (SchemaChangeException | ClassTransformationException | IOException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
    }
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Stopwatch(com.google.common.base.Stopwatch) IOException(java.io.IOException)

Example 4 with ExpandableHyperContainer

use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.

the class PriorityQueueTemplate method resetQueue.

@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
    assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
    BatchSchema schema = container.getSchema();
    VectorContainer newContainer = new VectorContainer();
    for (MaterializedField field : schema) {
        int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
        newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
    }
    newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    // Cleanup before recreating hyperbatch and sv4.
    cleanup();
    hyperBatch = new ExpandableHyperContainer(newContainer);
    batchCount = hyperBatch.iterator().next().getValueVectors().length;
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
    heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
    // Reset queue size (most likely to be set to limit).
    queueSize = 0;
    for (int i = 0; i < v4.getTotalCount(); i++) {
        heapSv4.set(i, v4.get(i));
        ++queueSize;
    }
    v4.clear();
    doSetup(context, hyperBatch, null);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) BatchSchema(org.apache.drill.exec.record.BatchSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 5 with ExpandableHyperContainer

use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.

the class HashJoinBatch method executeBuildPhase.

public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
    // skip first batch if count is zero, as it may be an empty schema batch
    if (right.getRecordCount() == 0) {
        for (final VectorWrapper<?> w : right) {
            w.clear();
        }
        rightUpstream = next(right);
    }
    boolean moreData = true;
    while (moreData) {
        switch(rightUpstream) {
            case OUT_OF_MEMORY:
            case NONE:
            case NOT_YET:
            case STOP:
                moreData = false;
                continue;
            case OK_NEW_SCHEMA:
                if (rightSchema == null) {
                    rightSchema = right.getSchema();
                    if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
                        final String errorMsg = new StringBuilder().append("Hash join does not support build batch with selection vectors. ").append("Build batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
                        throw new SchemaChangeException(errorMsg);
                    }
                    setupHashTable();
                } else {
                    if (!rightSchema.equals(right.getSchema())) {
                        throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
                    }
                    hashTable.updateBatches();
                }
            // Fall through
            case OK:
                final int currentRecordCount = right.getRecordCount();
                /* For every new build batch, we store some state in the helper context
                     * Add new state to the helper context
                     */
                hjHelper.addNewBatch(currentRecordCount);
                // Holder contains the global index where the key is hashed into using the hash table
                final IndexPointer htIndex = new IndexPointer();
                // For every record in the build batch , hash the key columns
                for (int i = 0; i < currentRecordCount; i++) {
                    hashTable.put(i, htIndex, 1);
                    /* Use the global index returned by the hash table, to store
                         * the current record index and batch index. This will be used
                         * later when we probe and find a match.
                         */
                    hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
                }
                /* Completed hashing all records in this batch. Transfer the batch
                     * to the hyper vector container. Will be used when we want to retrieve
                     * records that have matching keys on the probe side.
                     */
                final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
                boolean success = false;
                try {
                    if (hyperContainer == null) {
                        hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
                    } else {
                        hyperContainer.addBatch(nextBatch.getContainer());
                    }
                    // completed processing a batch, increment batch index
                    buildBatchIndex++;
                    success = true;
                } finally {
                    if (!success) {
                        nextBatch.clear();
                    }
                }
                break;
        }
        // Get the next record batch
        rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer)

Aggregations

ExpandableHyperContainer (org.apache.drill.exec.record.ExpandableHyperContainer)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 IOException (java.io.IOException)3 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)3 Stopwatch (com.google.common.base.Stopwatch)2 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)2 VectorContainer (org.apache.drill.exec.record.VectorContainer)2 DrillBuf (io.netty.buffer.DrillBuf)1 IndexPointer (org.apache.drill.exec.physical.impl.common.IndexPointer)1 BatchSchema (org.apache.drill.exec.record.BatchSchema)1 MaterializedField (org.apache.drill.exec.record.MaterializedField)1 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)1 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)1 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)1 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)1