Search in sources :

Example 1 with SortRecordBatchBuilder

use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.

the class TopNBatch method purgeAndResetPriorityQueue.

/**
   * Handle schema changes during execution.
   * 1. Purge existing batches
   * 2. Promote newly created container for new schema.
   * 3. Recreate priority queue and reset with coerced container.
   * @throws SchemaChangeException
   */
public void purgeAndResetPriorityQueue() throws SchemaChangeException, ClassTransformationException, IOException {
    final Stopwatch watch = Stopwatch.createStarted();
    final VectorContainer c = priorityQueue.getHyperBatch();
    final VectorContainer newContainer = new VectorContainer(oContext);
    @SuppressWarnings("resource") final SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
    final SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
    final SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
    copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
    @SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        do {
            final int count = selectionVector4.getCount();
            final int copiedRecords = copier.copyRecords(0, count);
            assert copiedRecords == count;
            for (VectorWrapper<?> v : newContainer) {
                ValueVector.Mutator m = v.getValueVector().getMutator();
                m.setValueCount(count);
            }
            newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            newContainer.setRecordCount(count);
            builder.add(newBatch);
        } while (selectionVector4.next());
        selectionVector4.clear();
        c.clear();
        final VectorContainer oldSchemaContainer = new VectorContainer(oContext);
        builder.canonicalize();
        builder.build(context, oldSchemaContainer);
        oldSchemaContainer.setRecordCount(builder.getSv4().getCount());
        final VectorContainer newSchemaContainer = SchemaUtil.coerceContainer(oldSchemaContainer, this.schema, oContext);
        // Canonicalize new container since we canonicalize incoming batches before adding to queue.
        final VectorContainer canonicalizedContainer = VectorContainer.canonicalize(newSchemaContainer);
        canonicalizedContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
        priorityQueue.cleanup();
        priorityQueue = createNewPriorityQueue(context, config.getOrderings(), canonicalizedContainer, MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
        priorityQueue.resetQueue(canonicalizedContainer, builder.getSv4().createNewWrapperCurrent());
    } finally {
        builder.clear();
        builder.close();
    }
    logger.debug("Took {} us to purge and recreate queue for new schema", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Stopwatch(com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 2 with SortRecordBatchBuilder

use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.

the class MergeSort method merge.

/**
   * Merge the set of in-memory batches to produce a single logical output in the given
   * destination container, indexed by an SV4.
   *
   * @param batchGroups the complete set of in-memory batches
   * @param batch the record batch (operator) for the sort operator
   * @param destContainer the vector container for the sort operator
   * @return the sv4 for this operator
   */
public SelectionVector4 merge(LinkedList<BatchGroup.InputBatch> batchGroups, VectorAccessible batch, VectorContainer destContainer) {
    // Add the buffered batches to a collection that MSorter can use.
    // The builder takes ownership of the batches and will release them if
    // an error occurs.
    builder = new SortRecordBatchBuilder(oAllocator);
    for (BatchGroup.InputBatch group : batchGroups) {
        RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator);
        rbd.setSv2(group.getSv2());
        builder.add(rbd);
    }
    batchGroups.clear();
    try {
        builder.build(context, destContainer);
        sv4 = builder.getSv4();
        mSorter = opCg.createNewMSorter(batch);
        mSorter.setup(context, oAllocator, sv4, destContainer, sv4.getCount());
    } catch (SchemaChangeException e) {
        throw UserException.unsupportedError(e).message("Unexpected schema change - likely code error.").build(logger);
    }
    // For testing memory-leaks, inject exception after mSorter finishes setup
    ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SETUP);
    mSorter.sort(destContainer);
    // sort may have prematurely exited due to should continue returning false.
    if (!context.shouldContinue()) {
        return null;
    }
    // For testing memory-leak purpose, inject exception after mSorter finishes sorting
    ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SORT);
    sv4 = mSorter.getSV4();
    destContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
    return sv4;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)

Example 3 with SortRecordBatchBuilder

use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.

the class OrderedPartitionRecordBatch method saveSamples.

@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;
    // Start collecting batches until recordsToSample records have been collected
    SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    WritableBatch batch = null;
    CachedVectorContainer sampleToSave = null;
    VectorContainer containerToCache = new VectorContainer();
    try {
        builder.add(incoming);
        recordsSampled += incoming.getRecordCount();
        outer: while (recordsSampled < recordsToSample) {
            upstream = next(incoming);
            switch(upstream) {
                case NONE:
                case NOT_YET:
                case STOP:
                    upstreamNone = true;
                    break outer;
                default:
            }
            builder.add(incoming);
            recordsSampled += incoming.getRecordCount();
            if (upstream == IterOutcome.NONE) {
                break;
            }
        }
        VectorContainer sortedSamples = new VectorContainer();
        builder.build(context, sortedSamples);
        // Sort the records according the orderings given in the configuration
        Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
        SelectionVector4 sv4 = builder.getSv4();
        sorter.setup(context, sv4, sortedSamples);
        sorter.sort(sv4, sortedSamples);
        // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
        // Uses the
        // the expressions from the Orderings to populate each column. There is one column for each Ordering in
        // popConfig.orderings.
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
                break;
            } else {
                containerToCache.zeroVectors();
                allocationSize *= 2;
            }
        }
        for (VectorWrapper<?> vw : containerToCache) {
            vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        containerToCache.setRecordCount(copier.getOutputRecords());
        // Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
        // into a serializable wrapper object, and then add to distributed map
        batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
        sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
        mmap.put(mapKey, sampleToSave);
        this.sampledIncomingBatches = builder.getHeldRecordBatches();
    } finally {
        builder.clear();
        builder.close();
        if (batch != null) {
            batch.clear();
        }
        containerToCache.clear();
        if (sampleToSave != null) {
            sampleToSave.clear();
        }
    }
    return true;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 4 with SortRecordBatchBuilder

use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
    // Get all samples from distributed map
    @SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    final VectorContainer allSamplesContainer = new VectorContainer();
    final VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(context, allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        @SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        sorter.setup(context, newSv4, allSamplesContainer);
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                for (VectorWrapper<?> vw : candidatePartitionTable) {
                    vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
                }
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 5 with SortRecordBatchBuilder

use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.

the class TopNBatch method purge.

private void purge() throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    VectorContainer c = priorityQueue.getHyperBatch();
    VectorContainer newContainer = new VectorContainer(oContext);
    @SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
    SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
    SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
    if (copier == null) {
        copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
    } else {
        for (VectorWrapper<?> i : batch) {
            @SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
            newContainer.add(v);
        }
        copier.setupRemover(context, batch, newBatch);
    }
    @SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        do {
            int count = selectionVector4.getCount();
            int copiedRecords = copier.copyRecords(0, count);
            assert copiedRecords == count;
            for (VectorWrapper<?> v : newContainer) {
                ValueVector.Mutator m = v.getValueVector().getMutator();
                m.setValueCount(count);
            }
            newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            newContainer.setRecordCount(count);
            builder.add(newBatch);
        } while (selectionVector4.next());
        selectionVector4.clear();
        c.clear();
        VectorContainer newQueue = new VectorContainer();
        builder.canonicalize();
        builder.build(context, newQueue);
        priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
        builder.getSv4().clear();
        selectionVector4.clear();
    } finally {
        DrillAutoCloseables.closeNoChecked(builder);
    }
    logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Stopwatch(com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)5 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 Stopwatch (com.google.common.base.Stopwatch)3 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)2 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)2 Sorter (org.apache.drill.exec.physical.impl.sort.Sorter)2 WritableBatch (org.apache.drill.exec.record.WritableBatch)2 IOException (java.io.IOException)1 FieldReference (org.apache.drill.common.expression.FieldReference)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 Ordering (org.apache.drill.common.logical.data.Order.Ordering)1 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)1 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)1 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)1 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)1