Search in sources :

Example 1 with CachedVectorContainer

use of org.apache.drill.exec.cache.CachedVectorContainer in project drill by apache.

the class OrderedPartitionRecordBatch method getPartitionVectors.

/**
   * This method is called when the first batch comes in. Incoming batches are collected until a threshold is met. At
   * that point, the records in the batches are sorted and sampled, and the sampled records are stored in the
   * distributed cache. Once a sufficient fraction of the fragments have shared their samples, each fragment grabs all
   * the samples, sorts all the records, builds a partition table, and attempts to push the partition table to the
   * distributed cache. Whichever table gets pushed first becomes the table used by all fragments for partitioning.
   *
   * @return True is successful. False if failed.
   */
private boolean getPartitionVectors() {
    try {
        if (!saveSamples()) {
            return false;
        }
        CachedVectorContainer finalTable = null;
        long val = minorFragmentSampleCount.incrementAndGet();
        logger.debug("Incremented mfsc, got {}", val);
        final long fragmentsBeforeProceed = (long) Math.ceil(sendingMajorFragmentWidth * completionFactor);
        final String finalTableKey = mapKey + "final";
        if (val == fragmentsBeforeProceed) {
            // we crossed the barrier, build table and get data.
            buildTable();
            finalTable = tableMap.get(finalTableKey);
        } else {
            if (val < fragmentsBeforeProceed) {
                if (!waitUntilTimeOut(10)) {
                    return false;
                }
            }
            for (int i = 0; i < 100 && finalTable == null; i++) {
                finalTable = tableMap.get(finalTableKey);
                if (finalTable != null) {
                    break;
                }
                if (!waitUntilTimeOut(10)) {
                    return false;
                }
            }
            if (finalTable == null) {
                buildTable();
            }
            finalTable = tableMap.get(finalTableKey);
        }
        Preconditions.checkState(finalTable != null);
        // the rest of this operator
        for (VectorWrapper<?> w : finalTable.get()) {
            partitionVectors.add(w.getValueVector());
        }
    } catch (final ClassTransformationException | IOException | SchemaChangeException ex) {
        kill(false);
        context.fail(ex);
        return false;
    // TODO InterruptedException
    }
    return true;
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) IOException(java.io.IOException) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer)

Example 2 with CachedVectorContainer

use of org.apache.drill.exec.cache.CachedVectorContainer in project drill by apache.

the class OrderedPartitionRecordBatch method saveSamples.

@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;
    // Start collecting batches until recordsToSample records have been collected
    SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    WritableBatch batch = null;
    CachedVectorContainer sampleToSave = null;
    VectorContainer containerToCache = new VectorContainer();
    try {
        builder.add(incoming);
        recordsSampled += incoming.getRecordCount();
        outer: while (recordsSampled < recordsToSample) {
            upstream = next(incoming);
            switch(upstream) {
                case NONE:
                case NOT_YET:
                case STOP:
                    upstreamNone = true;
                    break outer;
                default:
            }
            builder.add(incoming);
            recordsSampled += incoming.getRecordCount();
            if (upstream == IterOutcome.NONE) {
                break;
            }
        }
        VectorContainer sortedSamples = new VectorContainer();
        builder.build(context, sortedSamples);
        // Sort the records according the orderings given in the configuration
        Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
        SelectionVector4 sv4 = builder.getSv4();
        sorter.setup(context, sv4, sortedSamples);
        sorter.sort(sv4, sortedSamples);
        // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
        // Uses the
        // the expressions from the Orderings to populate each column. There is one column for each Ordering in
        // popConfig.orderings.
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
                break;
            } else {
                containerToCache.zeroVectors();
                allocationSize *= 2;
            }
        }
        for (VectorWrapper<?> vw : containerToCache) {
            vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        containerToCache.setRecordCount(copier.getOutputRecords());
        // Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
        // into a serializable wrapper object, and then add to distributed map
        batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
        sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
        mmap.put(mapKey, sampleToSave);
        this.sampledIncomingBatches = builder.getHeldRecordBatches();
    } finally {
        builder.clear();
        builder.close();
        if (batch != null) {
            batch.clear();
        }
        containerToCache.clear();
        if (sampleToSave != null) {
            sampleToSave.clear();
        }
    }
    return true;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 3 with CachedVectorContainer

use of org.apache.drill.exec.cache.CachedVectorContainer in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
    // Get all samples from distributed map
    @SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    final VectorContainer allSamplesContainer = new VectorContainer();
    final VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(context, allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        @SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        sorter.setup(context, newSv4, allSamplesContainer);
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                for (VectorWrapper<?> vw : candidatePartitionTable) {
                    vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
                }
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)3 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)2 Sorter (org.apache.drill.exec.physical.impl.sort.Sorter)2 VectorContainer (org.apache.drill.exec.record.VectorContainer)2 WritableBatch (org.apache.drill.exec.record.WritableBatch)2 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)2 ValueVector (org.apache.drill.exec.vector.ValueVector)2 IOException (java.io.IOException)1 FieldReference (org.apache.drill.common.expression.FieldReference)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 Ordering (org.apache.drill.common.logical.data.Order.Ordering)1 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)1 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1