Search in sources :

Example 1 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class PriorityQueueTemplate method init.

@Override
public void init(int limit, FragmentContext context, BufferAllocator allocator, boolean hasSv2) throws SchemaChangeException {
    this.limit = limit;
    this.context = context;
    this.allocator = allocator;
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
    heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
    this.hasSv2 = hasSv2;
}
Also used : DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 2 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class PriorityQueueTemplate method generate.

@Override
public void generate() throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * queueSize);
    finalSv4 = new SelectionVector4(drillBuf, queueSize, 4000);
    for (int i = queueSize - 1; i >= 0; i--) {
        finalSv4.set(i, pop());
    }
    logger.debug("Took {} us to generate output of {}", watch.elapsed(TimeUnit.MICROSECONDS), finalSv4.getTotalCount());
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 3 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class TopNBatch method purgeAndResetPriorityQueue.

/**
   * Handle schema changes during execution.
   * 1. Purge existing batches
   * 2. Promote newly created container for new schema.
   * 3. Recreate priority queue and reset with coerced container.
   * @throws SchemaChangeException
   */
public void purgeAndResetPriorityQueue() throws SchemaChangeException, ClassTransformationException, IOException {
    final Stopwatch watch = Stopwatch.createStarted();
    final VectorContainer c = priorityQueue.getHyperBatch();
    final VectorContainer newContainer = new VectorContainer(oContext);
    @SuppressWarnings("resource") final SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
    final SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
    final SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
    copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
    @SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        do {
            final int count = selectionVector4.getCount();
            final int copiedRecords = copier.copyRecords(0, count);
            assert copiedRecords == count;
            for (VectorWrapper<?> v : newContainer) {
                ValueVector.Mutator m = v.getValueVector().getMutator();
                m.setValueCount(count);
            }
            newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            newContainer.setRecordCount(count);
            builder.add(newBatch);
        } while (selectionVector4.next());
        selectionVector4.clear();
        c.clear();
        final VectorContainer oldSchemaContainer = new VectorContainer(oContext);
        builder.canonicalize();
        builder.build(context, oldSchemaContainer);
        oldSchemaContainer.setRecordCount(builder.getSv4().getCount());
        final VectorContainer newSchemaContainer = SchemaUtil.coerceContainer(oldSchemaContainer, this.schema, oContext);
        // Canonicalize new container since we canonicalize incoming batches before adding to queue.
        final VectorContainer canonicalizedContainer = VectorContainer.canonicalize(newSchemaContainer);
        canonicalizedContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
        priorityQueue.cleanup();
        priorityQueue = createNewPriorityQueue(context, config.getOrderings(), canonicalizedContainer, MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
        priorityQueue.resetQueue(canonicalizedContainer, builder.getSv4().createNewWrapperCurrent());
    } finally {
        builder.clear();
        builder.close();
    }
    logger.debug("Took {} us to purge and recreate queue for new schema", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Stopwatch(com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 4 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class MSortTemplate method setup.

@Override
public void setup(final FragmentContext context, final BufferAllocator allocator, final SelectionVector4 vector4, final VectorContainer hyperBatch, int outputBatchSize) throws SchemaChangeException {
    // we pass in the local hyperBatch since that is where we'll be reading data.
    Preconditions.checkNotNull(vector4);
    this.vector4 = vector4.createNewWrapperCurrent();
    this.context = context;
    vector4.clear();
    doSetup(context, hyperBatch, null);
    // Populate the queue with the offset in the SV4 of each
    // batch. Note that this is expensive as it requires a scan
    // of all items to be sorted: potentially millions.
    runStarts.add(0);
    int batch = 0;
    final int totalCount = this.vector4.getTotalCount();
    for (int i = 0; i < totalCount; i++) {
        final int newBatch = this.vector4.get(i) >>> 16;
        if (newBatch == batch) {
            continue;
        } else if (newBatch == batch + 1) {
            runStarts.add(i);
            batch = newBatch;
        } else {
            throw new UnsupportedOperationException(String.format("Missing batch. batch: %d newBatch: %d", batch, newBatch));
        }
    }
    // Create a temporary SV4 to hold the merged results.
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * totalCount);
    desiredRecordBatchCount = Math.min(outputBatchSize, Character.MAX_VALUE);
    desiredRecordBatchCount = Math.min(desiredRecordBatchCount, totalCount);
    aux = new SelectionVector4(drillBuf, totalCount, desiredRecordBatchCount);
}
Also used : DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 5 with SelectionVector4

use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.

the class MSortTemplate method sort.

@Override
public void sort(final VectorContainer container) {
    final Stopwatch watch = Stopwatch.createStarted();
    while (runStarts.size() > 1) {
        // check if we're cancelled/failed frequently
        if (!context.shouldContinue()) {
            return;
        }
        int outIndex = 0;
        final Queue<Integer> newRunStarts = Queues.newLinkedBlockingQueue();
        newRunStarts.add(outIndex);
        final int size = runStarts.size();
        for (int i = 0; i < size / 2; i++) {
            final int left = runStarts.poll();
            final int right = runStarts.poll();
            Integer end = runStarts.peek();
            if (end == null) {
                end = vector4.getTotalCount();
            }
            outIndex = merge(left, right, end, outIndex);
            if (outIndex < vector4.getTotalCount()) {
                newRunStarts.add(outIndex);
            }
        }
        if (outIndex < vector4.getTotalCount()) {
            copyRun(outIndex, vector4.getTotalCount());
        }
        final SelectionVector4 tmp = aux.createNewWrapperCurrent(desiredRecordBatchCount);
        aux.clear();
        aux = vector4.createNewWrapperCurrent(desiredRecordBatchCount);
        vector4.clear();
        vector4 = tmp.createNewWrapperCurrent(desiredRecordBatchCount);
        tmp.clear();
        runStarts = newRunStarts;
    }
    aux.clear();
}
Also used : Stopwatch(com.google.common.base.Stopwatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)19 DrillBuf (io.netty.buffer.DrillBuf)8 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 ValueVector (org.apache.drill.exec.vector.ValueVector)6 Stopwatch (com.google.common.base.Stopwatch)4 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)3 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)2 Sorter (org.apache.drill.exec.physical.impl.sort.Sorter)2 WritableBatch (org.apache.drill.exec.record.WritableBatch)2 ConfigException (com.typesafe.config.ConfigException)1 ByteBuf (io.netty.buffer.ByteBuf)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 TreeMap (java.util.TreeMap)1 FieldReference (org.apache.drill.common.expression.FieldReference)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 Ordering (org.apache.drill.common.logical.data.Order.Ordering)1