Search in sources :

Example 6 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class TopNBatch method buildSchema.

@Override
public void buildSchema() throws SchemaChangeException {
    VectorContainer c = new VectorContainer(oContext);
    IterOutcome outcome = next(incoming);
    switch(outcome) {
        case OK:
        case OK_NEW_SCHEMA:
            for (VectorWrapper<?> w : incoming) {
                @SuppressWarnings("resource") ValueVector v = c.addOrGet(w.getField());
                if (v instanceof AbstractContainerVector) {
                    w.getValueVector().makeTransferPair(v);
                    v.clear();
                }
            }
            c = VectorContainer.canonicalize(c);
            for (VectorWrapper<?> w : c) {
                @SuppressWarnings("resource") ValueVector v = container.addOrGet(w.getField());
                if (v instanceof AbstractContainerVector) {
                    w.getValueVector().makeTransferPair(v);
                    v.clear();
                }
                v.allocateNew();
            }
            container.buildSchema(SelectionVectorMode.NONE);
            container.setRecordCount(0);
            return;
        case STOP:
            state = BatchState.STOP;
            return;
        case OUT_OF_MEMORY:
            state = BatchState.OUT_OF_MEMORY;
            return;
        case NONE:
            state = BatchState.DONE;
        default:
            return;
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) AbstractContainerVector(org.apache.drill.exec.vector.complex.AbstractContainerVector) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 7 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method processBatch.

/**
   * Process the converted incoming batch by adding it to the in-memory store
   * of data, or spilling data to disk when necessary.
   */
@SuppressWarnings("resource")
private void processBatch() {
    if (incoming.getRecordCount() == 0) {
        return;
    }
    // Determine actual sizes of the incoming batch before taking
    // ownership. Allows us to figure out if we need to spill first,
    // to avoid overflowing memory simply due to ownership transfer.
    RecordBatchSizer sizer = analyzeIncomingBatch();
    if (isSpillNeeded(sizer.actualSize())) {
        spillFromMemory();
    }
    // Sanity check. We should now be below the buffer memory maximum.
    long startMem = allocator.getAllocatedMemory();
    if (startMem > bufferMemoryPool) {
        logger.error("ERROR: Failed to spill above buffer limit. Buffer pool = {}, memory = {}", bufferMemoryPool, startMem);
    }
    // Convert the incoming batch to the agreed-upon schema.
    // No converted batch means we got an empty input batch.
    // Converting the batch transfers memory ownership to our
    // allocator. This gives a round-about way to learn the batch
    // size: check the before and after memory levels, then use
    // the difference as the batch size, in bytes.
    VectorContainer convertedBatch = convertBatch();
    if (convertedBatch == null) {
        return;
    }
    SelectionVector2 sv2;
    try {
        sv2 = makeSelectionVector();
    } catch (Exception e) {
        convertedBatch.clear();
        throw e;
    }
    // Compute batch size, including allocation of an sv2.
    long endMem = allocator.getAllocatedMemory();
    long batchSize = endMem - startMem;
    int count = sv2.getCount();
    inputRecordCount += count;
    inputBatchCount++;
    totalInputBytes += sizer.actualSize();
    if (minimumBufferSpace == 0) {
        minimumBufferSpace = endMem;
    } else {
        minimumBufferSpace = Math.min(minimumBufferSpace, endMem);
    }
    stats.setLongStat(Metric.MIN_BUFFER, minimumBufferSpace);
    // Update the size based on the actual record count, not
    // the effective count as given by the selection vector
    // (which may exclude some records due to filtering.)
    updateMemoryEstimates(batchSize, sizer);
    // Sort the incoming batch using either the original selection vector,
    // or a new one created here.
    SingleBatchSorter sorter;
    sorter = opCodeGen.getSorter(convertedBatch);
    try {
        sorter.setup(context, sv2, convertedBatch);
    } catch (SchemaChangeException e) {
        convertedBatch.clear();
        throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
    }
    try {
        sorter.sort(sv2);
    } catch (SchemaChangeException e) {
        convertedBatch.clear();
        throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
    }
    RecordBatchData rbd = new RecordBatchData(convertedBatch, allocator);
    try {
        rbd.setSv2(sv2);
        bufferedBatches.add(new BatchGroup.InputBatch(rbd.getContainer(), rbd.getSv2(), oContext, sizer.netSize()));
        if (peakNumBatches < bufferedBatches.size()) {
            peakNumBatches = bufferedBatches.size();
            stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
        }
    } catch (Throwable t) {
        rbd.clear();
        throw t;
    }
}
Also used : RecordBatchSizer(org.apache.drill.exec.physical.impl.spill.RecordBatchSizer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) SingleBatchSorter(org.apache.drill.exec.physical.impl.xsort.SingleBatchSorter) InputBatch(org.apache.drill.exec.physical.impl.xsort.managed.BatchGroup.InputBatch) UserException(org.apache.drill.common.exceptions.UserException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) IOException(java.io.IOException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 8 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method mergeAndSpill.

public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException {
    logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
    logger.debug("mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory());
    VectorContainer outputContainer = new VectorContainer();
    List<BatchGroup> batchGroupList = Lists.newArrayList();
    int batchCount = batchGroups.size();
    for (int i = 0; i < batchCount / 2; i++) {
        if (batchGroups.size() == 0) {
            break;
        }
        @SuppressWarnings("resource") BatchGroup batch = batchGroups.pollLast();
        assert batch != null : "Encountered a null batch during merge and spill operation";
        batchGroupList.add(batch);
    }
    if (batchGroupList.size() == 0) {
        return null;
    }
    int estimatedRecordSize = 0;
    for (VectorWrapper<?> w : batchGroupList.get(0)) {
        try {
            estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
        } catch (UnsupportedOperationException e) {
            estimatedRecordSize += 50;
        }
    }
    int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize);
    VectorContainer hyperBatch = constructHyperBatch(batchGroupList);
    createCopier(hyperBatch, batchGroupList, outputContainer, true);
    int count = copier.next(targetRecordCount);
    assert count > 0;
    logger.debug("mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount);
    // 1 output container is kept in memory, so we want to hold on to it and transferClone
    // allows keeping ownership
    VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext);
    c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c1.setRecordCount(count);
    String spillDir = dirs.next();
    Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName));
    currSpillDirs.add(currSpillPath);
    String outputFile = Joiner.on("/").join(currSpillPath, spillCount++);
    try {
        fs.deleteOnExit(currSpillPath);
    } catch (IOException e) {
        // since this is meant to be used in a batches's spilling, we don't propagate the exception
        logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e);
    }
    stats.setLongStat(Metric.SPILL_COUNT, spillCount);
    BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext);
    try (AutoCloseable a = AutoCloseables.all(batchGroupList)) {
        logger.info("Merging and spilling to {}", outputFile);
        while ((count = copier.next(targetRecordCount)) > 0) {
            outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            outputContainer.setRecordCount(count);
            // note that addBatch also clears the outputContainer
            newGroup.addBatch(outputContainer);
        }
        injector.injectChecked(context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class);
        newGroup.closeOutputStream();
    } catch (Throwable e) {
        // we only need to cleanup newGroup if spill failed
        try {
            AutoCloseables.close(e, newGroup);
        } catch (Throwable t) {
        /* close() may hit the same IO issue; just ignore */
        }
        throw UserException.resourceError(e).message("External Sort encountered an error while spilling to disk").addContext(e.getMessage()).build(logger);
    } finally {
        hyperBatch.clear();
    }
    logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory());
    logger.info("Completed spilling to {}", outputFile);
    return newGroup;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 9 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method constructHyperBatch.

private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) {
    VectorContainer cont = new VectorContainer();
    for (MaterializedField field : schema) {
        ValueVector[] vectors = new ValueVector[batchGroupList.size()];
        int i = 0;
        for (BatchGroup group : batchGroupList) {
            vectors[i++] = group.getValueAccessorById(field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
        }
        cont.add(vectors);
    }
    cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    return cont;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 10 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class NoFrameSupportTemplate method setup.

@Override
public void setup(final List<WindowDataBatch> batches, final VectorContainer container, final OperatorContext oContext, final boolean requireFullPartition, final WindowPOP popConfig) throws SchemaChangeException {
    this.container = container;
    this.batches = batches;
    internal = new VectorContainer(oContext);
    allocateInternal();
    lagCopiedToInternal = false;
    outputCount = 0;
    partition = null;
    this.requireFullPartition = requireFullPartition;
}
Also used : VectorContainer(org.apache.drill.exec.record.VectorContainer)

Aggregations

VectorContainer (org.apache.drill.exec.record.VectorContainer)27 ValueVector (org.apache.drill.exec.vector.ValueVector)11 MaterializedField (org.apache.drill.exec.record.MaterializedField)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)6 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)6 Stopwatch (com.google.common.base.Stopwatch)5 SortRecordBatchBuilder (org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder)5 IOException (java.io.IOException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)4 CachedVectorContainer (org.apache.drill.exec.cache.CachedVectorContainer)3 VectorAccessibleSerializable (org.apache.drill.exec.cache.VectorAccessibleSerializable)3 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)3 WritableBatch (org.apache.drill.exec.record.WritableBatch)3 DrillBuf (io.netty.buffer.DrillBuf)2 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)2 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)2 ClassTransformationException (org.apache.drill.exec.exception.ClassTransformationException)2 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)2 RecordBatchData (org.apache.drill.exec.physical.impl.sort.RecordBatchData)2