Search in sources :

Example 1 with WritableBatch

use of org.apache.drill.exec.record.WritableBatch in project drill by apache.

the class BatchGroup method addBatch.

public void addBatch(VectorContainer newContainer) throws IOException {
    assert fs != null;
    assert path != null;
    if (outputStream == null) {
        outputStream = fs.create(path);
    }
    int recordCount = newContainer.getRecordCount();
    WritableBatch batch = WritableBatch.getBatchNoHVWrap(recordCount, newContainer, false);
    VectorAccessibleSerializable outputBatch = new VectorAccessibleSerializable(batch, allocator);
    Stopwatch watch = Stopwatch.createStarted();
    outputBatch.writeToStream(outputStream);
    newContainer.zeroVectors();
    logger.debug("Took {} us to spill {} records", watch.elapsed(TimeUnit.MICROSECONDS), recordCount);
    spilledBatches++;
}
Also used : VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) Stopwatch(com.google.common.base.Stopwatch) WritableBatch(org.apache.drill.exec.record.WritableBatch)

Example 2 with WritableBatch

use of org.apache.drill.exec.record.WritableBatch in project drill by apache.

the class TraceRecordBatch method doWork.

/**
   * Function is invoked for every record batch and it simply dumps the buffers associated with all the value vectors in
   * this record batch to a log file.
   */
@Override
protected IterOutcome doWork() {
    boolean incomingHasSv2 = incoming.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE;
    if (incomingHasSv2) {
        sv = incoming.getSelectionVector2();
    } else {
        sv = null;
    }
    WritableBatch batch = WritableBatch.getBatchNoHVWrap(incoming.getRecordCount(), incoming, incomingHasSv2);
    VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, sv, oContext.getAllocator());
    try {
        wrap.writeToStreamAndRetain(fos);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    batch.reconstructContainer(localAllocator, container);
    if (incomingHasSv2) {
        sv = wrap.getSv2();
    }
    return IterOutcome.OK;
}
Also used : VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) WritableBatch(org.apache.drill.exec.record.WritableBatch) IOException(java.io.IOException)

Example 3 with WritableBatch

use of org.apache.drill.exec.record.WritableBatch in project drill by apache.

the class VectorRecordMaterializer method convertNext.

public QueryWritableBatch convertNext() {
    //batch.getWritableBatch().getDef().getRecordCount()
    WritableBatch w = batch.getWritableBatch().transfer(allocator);
    QueryData header = //
    QueryData.newBuilder().setQueryId(//
    queryId).setRowCount(//
    batch.getRecordCount()).setDef(w.getDef()).build();
    QueryWritableBatch batch = new QueryWritableBatch(header, w.getBuffers());
    return batch;
}
Also used : QueryData(org.apache.drill.exec.proto.UserBitShared.QueryData) WritableBatch(org.apache.drill.exec.record.WritableBatch)

Example 4 with WritableBatch

use of org.apache.drill.exec.record.WritableBatch in project drill by apache.

the class OrderedPartitionRecordBatch method saveSamples.

@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;
    // Start collecting batches until recordsToSample records have been collected
    SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
    WritableBatch batch = null;
    CachedVectorContainer sampleToSave = null;
    VectorContainer containerToCache = new VectorContainer();
    try {
        builder.add(incoming);
        recordsSampled += incoming.getRecordCount();
        outer: while (recordsSampled < recordsToSample) {
            upstream = next(incoming);
            switch(upstream) {
                case NONE:
                case NOT_YET:
                case STOP:
                    upstreamNone = true;
                    break outer;
                default:
            }
            builder.add(incoming);
            recordsSampled += incoming.getRecordCount();
            if (upstream == IterOutcome.NONE) {
                break;
            }
        }
        VectorContainer sortedSamples = new VectorContainer();
        builder.build(context, sortedSamples);
        // Sort the records according the orderings given in the configuration
        Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
        SelectionVector4 sv4 = builder.getSv4();
        sorter.setup(context, sv4, sortedSamples);
        sorter.sort(sv4, sortedSamples);
        // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
        // Uses the
        // the expressions from the Orderings to populate each column. There is one column for each Ordering in
        // popConfig.orderings.
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
                break;
            } else {
                containerToCache.zeroVectors();
                allocationSize *= 2;
            }
        }
        for (VectorWrapper<?> vw : containerToCache) {
            vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        containerToCache.setRecordCount(copier.getOutputRecords());
        // Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
        // into a serializable wrapper object, and then add to distributed map
        batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
        sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
        mmap.put(mapKey, sampleToSave);
        this.sampledIncomingBatches = builder.getHeldRecordBatches();
    } finally {
        builder.clear();
        builder.close();
        if (batch != null) {
            batch.clear();
        }
        containerToCache.clear();
        if (sampleToSave != null) {
            sampleToSave.clear();
        }
    }
    return true;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 5 with WritableBatch

use of org.apache.drill.exec.record.WritableBatch in project drill by apache.

the class OrderedPartitionRecordBatch method buildTable.

private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
    // Get all samples from distributed map
    @SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
    final VectorContainer allSamplesContainer = new VectorContainer();
    final VectorContainer candidatePartitionTable = new VectorContainer();
    CachedVectorContainer wrap = null;
    try {
        for (CachedVectorContainer w : mmap.get(mapKey)) {
            containerBuilder.add(w.get());
        }
        containerBuilder.build(context, allSamplesContainer);
        List<Ordering> orderDefs = Lists.newArrayList();
        int i = 0;
        for (Ordering od : popConfig.getOrderings()) {
            SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
            orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
        }
        // sort the data incoming samples.
        @SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
        Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
        sorter.setup(context, newSv4, allSamplesContainer);
        sorter.sort(newSv4, allSamplesContainer);
        // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
        // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
        SampleCopier copier = null;
        List<ValueVector> localAllocationVectors = Lists.newArrayList();
        copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
        int allocationSize = 50;
        while (true) {
            for (ValueVector vv : localAllocationVectors) {
                AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
            }
            int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
            if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
                assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
                for (VectorWrapper<?> vw : candidatePartitionTable) {
                    vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
                }
                break;
            } else {
                candidatePartitionTable.zeroVectors();
                allocationSize *= 2;
            }
        }
        candidatePartitionTable.setRecordCount(copier.getOutputRecords());
        @SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
        wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
        tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
    } finally {
        candidatePartitionTable.clear();
        allSamplesContainer.clear();
        containerBuilder.clear();
        containerBuilder.close();
        if (wrap != null) {
            wrap.clear();
        }
    }
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SortRecordBatchBuilder(org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder) VectorContainer(org.apache.drill.exec.record.VectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) CachedVectorContainer(org.apache.drill.exec.cache.CachedVectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) Ordering(org.apache.drill.common.logical.data.Order.Ordering) Sorter(org.apache.drill.exec.physical.impl.sort.Sorter) WritableBatch(org.apache.drill.exec.record.WritableBatch) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

WritableBatch (org.apache.drill.exec.record.WritableBatch)8 ValueVector (org.apache.drill.exec.vector.ValueVector)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)2 ExecTest (org.apache.drill.exec.ExecTest)2 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)2 VectorAccessibleSerializable (org.apache.drill.exec.cache.VectorAccessibleSerializable)2 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)2 Sorter (org.apache.drill.exec.physical.impl.sort.Sorter)2 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)2 IntVector (org.apache.drill.exec.vector.IntVector)2 Test (org.junit.Test)2 Stopwatch (com.google.common.base.Stopwatch)1 ByteBuf (io.netty.buffer.ByteBuf)1 DrillBuf (io.netty.buffer.DrillBuf)1 File (java.io.File)1 IOException (java.io.IOException)1 DrillConfig (org.apache.drill.common.config.DrillConfig)1 FieldReference (org.apache.drill.common.expression.FieldReference)1 Ordering (org.apache.drill.common.logical.data.Order.Ordering)1