Search in sources :

Example 16 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
    // Get all samples from distributed map
    @SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    final VectorContainer allSamplesContainer = new VectorContainer();
    final VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(context, allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        @SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        sorter.setup(context, newSv4, allSamplesContainer);
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                for (VectorWrapper<?> vw : candidatePartitionTable) {
                    vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
                }
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 17 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class FrameSupportTemplate method setup.

@Override
public void setup(final List<WindowDataBatch> batches, final VectorContainer container, final OperatorContext oContext, final boolean requireFullPartition, final WindowPOP popConfig) throws SchemaChangeException {
    this.container = container;
    this.batches = batches;
    internal = new VectorContainer(oContext);
    allocateInternal();
    outputCount = 0;
    this.requireFullPartition = requireFullPartition;
    this.popConfig = popConfig;
}
Also used : VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 18 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class SortRecordBatchBuilder method getHeldRecordBatches.

public List<VectorContainer> getHeldRecordBatches() {
    ArrayList<VectorContainer> containerList = Lists.newArrayList();
    for (BatchSchema bs : batches.keySet()) {
        for (RecordBatchData bd : batches.get(bs)) {
            VectorContainer c = bd.getContainer();
            c.setRecordCount(bd.getRecordCount());
            containerList.add(c);
        }
    }
    batches.clear();
    return containerList;
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 19 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class TopNBatch method purge.

private void purge() throws SchemaChangeException {
    Stopwatch watch = Stopwatch.createStarted();
    VectorContainer c = priorityQueue.getHyperBatch();
    VectorContainer newContainer = new VectorContainer(oContext);
    @SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
    SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
    SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
    if (copier == null) {
        copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
    } else {
        for (VectorWrapper<?> i : batch) {
            @SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
            newContainer.add(v);
        }
        copier.setupRemover(context, batch, newBatch);
    }
    @SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    try {
        do {
            int count = selectionVector4.getCount();
            int copiedRecords = copier.copyRecords(0, count);
            assert copiedRecords == count;
            for (VectorWrapper<?> v : newContainer) {
                ValueVector.Mutator m = v.getValueVector().getMutator();
                m.setValueCount(count);
            }
            newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            newContainer.setRecordCount(count);
            builder.add(newBatch);
        } while (selectionVector4.next());
        selectionVector4.clear();
        c.clear();
        VectorContainer newQueue = new VectorContainer();
        builder.canonicalize();
        builder.build(context, newQueue);
        priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
        builder.getSv4().clear();
        selectionVector4.clear();
    } finally {
        DrillAutoCloseables.closeNoChecked(builder);
    }
    logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Stopwatch(com.google.common.base.Stopwatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 20 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class PriorityQueueTemplate method resetQueue.

@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
    assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
    BatchSchema schema = container.getSchema();
    VectorContainer newContainer = new VectorContainer();
    for (MaterializedField field : schema) {
        int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
        newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
    }
    newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    // Cleanup before recreating hyperbatch and sv4.
    cleanup();
    hyperBatch = new ExpandableHyperContainer(newContainer);
    batchCount = hyperBatch.iterator().next().getValueVectors().length;
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
    heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
    // Reset queue size (most likely to be set to limit).
    queueSize = 0;
    for (int i = 0; i < v4.getTotalCount(); i++) {
        heapSv4.set(i, v4.get(i));
        ++queueSize;
    }
    v4.clear();
    doSetup(context, hyperBatch, null);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) BatchSchema(org.apache.drill.exec.record.BatchSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

VectorContainer (org.apache.drill.exec.record.VectorContainer)27 ValueVector (org.apache.drill.exec.vector.ValueVector)11 MaterializedField (org.apache.drill.exec.record.MaterializedField)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)6 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)6 Stopwatch (com.google.common.base.Stopwatch)5 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)5 IOException (java.io.IOException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)4 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)3 VectorAccessibleSerializable (org.apache.drill.exec.cache.VectorAccessibleSerializable)3 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)3 WritableBatch (org.apache.drill.exec.record.WritableBatch)3 DrillBuf (io.netty.buffer.DrillBuf)2 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)2 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)2 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)2 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)2 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)2