Search in sources :

Example 1 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class DirectRowSet method toContainer.

private static VectorContainer toContainer(VectorAccessible va, BufferAllocator allocator) {
    VectorContainer container = VectorContainer.getTransferClone(va, allocator);
    container.buildSchema(SelectionVectorMode.NONE);
    container.setRecordCount(va.getRecordCount());
    return container;
}
Also used : VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 2 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method processBatch.

/**
   * Process the converted incoming batch by adding it to the in-memory store
   * of data, or spilling data to disk when necessary.
   */
@SuppressWarnings("resource")
private void processBatch() {
    if (incoming.getRecordCount() == 0) {
        return;
    }
    // Determine actual sizes of the incoming batch before taking
    // ownership. Allows us to figure out if we need to spill first,
    // to avoid overflowing memory simply due to ownership transfer.
    RecordBatchSizer sizer = analyzeIncomingBatch();
    if (isSpillNeeded(sizer.actualSize())) {
        spillFromMemory();
    }
    // Sanity check. We should now be below the buffer memory maximum.
    long startMem = allocator.getAllocatedMemory();
    if (startMem > bufferMemoryPool) {
        logger.error("ERROR: Failed to spill above buffer limit. Buffer pool = {}, memory = {}", bufferMemoryPool, startMem);
    }
    // Convert the incoming batch to the agreed-upon schema.
    // No converted batch means we got an empty input batch.
    // Converting the batch transfers memory ownership to our
    // allocator. This gives a round-about way to learn the batch
    // size: check the before and after memory levels, then use
    // the difference as the batch size, in bytes.
    VectorContainer convertedBatch = convertBatch();
    if (convertedBatch == null) {
        return;
    }
    SelectionVector2 sv2;
    try {
        sv2 = makeSelectionVector();
    } catch (Exception e) {
        convertedBatch.clear();
        throw e;
    }
    // Compute batch size, including allocation of an sv2.
    long endMem = allocator.getAllocatedMemory();
    long batchSize = endMem - startMem;
    int count = sv2.getCount();
    inputRecordCount += count;
    inputBatchCount++;
    totalInputBytes += sizer.actualSize();
    if (minimumBufferSpace == 0) {
        minimumBufferSpace = endMem;
    } else {
        minimumBufferSpace = Math.min(minimumBufferSpace, endMem);
    }
    stats.setLongStat(Metric.MIN_BUFFER, minimumBufferSpace);
    // Update the size based on the actual record count, not
    // the effective count as given by the selection vector
    // (which may exclude some records due to filtering.)
    updateMemoryEstimates(batchSize, sizer);
    // Sort the incoming batch using either the original selection vector,
    // or a new one created here.
    SingleBatchSorter sorter;
    sorter = opCodeGen.getSorter(convertedBatch);
    try {
        sorter.setup(context, sv2, convertedBatch);
    } catch (SchemaChangeException e) {
        convertedBatch.clear();
        throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
    }
    try {
        sorter.sort(sv2);
    } catch (SchemaChangeException e) {
        convertedBatch.clear();
        throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
    }
    RecordBatchData rbd = new RecordBatchData(convertedBatch, allocator);
    try {
        rbd.setSv2(sv2);
        bufferedBatches.add(new BatchGroup.InputBatch(rbd.getContainer(), rbd.getSv2(), oContext, sizer.netSize()));
        if (peakNumBatches < bufferedBatches.size()) {
            peakNumBatches = bufferedBatches.size();
            stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
        }
    } catch (Throwable t) {
        rbd.clear();
        throw t;
    }
}
Also used : RecordBatchSizer(org.apache.drill.exec.physical.impl.spill.RecordBatchSizer) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) SingleBatchSorter(org.apache.drill.exec.physical.impl.xsort.SingleBatchSorter) InputBatch(org.apache.drill.exec.physical.impl.xsort.managed.BatchGroup.InputBatch) UserException(org.apache.drill.common.exceptions.UserException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) IOException(java.io.IOException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 3 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method mergeAndSpill.

public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException {
    logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
    logger.debug("mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory());
    VectorContainer outputContainer = new VectorContainer();
    List<BatchGroup> batchGroupList = Lists.newArrayList();
    int batchCount = batchGroups.size();
    for (int i = 0; i < batchCount / 2; i++) {
        if (batchGroups.size() == 0) {
            break;
        }
        @SuppressWarnings("resource") BatchGroup batch = batchGroups.pollLast();
        assert batch != null : "Encountered a null batch during merge and spill operation";
        batchGroupList.add(batch);
    }
    if (batchGroupList.size() == 0) {
        return null;
    }
    int estimatedRecordSize = 0;
    for (VectorWrapper<?> w : batchGroupList.get(0)) {
        try {
            estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
        } catch (UnsupportedOperationException e) {
            estimatedRecordSize += 50;
        }
    }
    int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize);
    VectorContainer hyperBatch = constructHyperBatch(batchGroupList);
    createCopier(hyperBatch, batchGroupList, outputContainer, true);
    int count = copier.next(targetRecordCount);
    assert count > 0;
    logger.debug("mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount);
    // 1 output container is kept in memory, so we want to hold on to it and transferClone
    // allows keeping ownership
    VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext);
    c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c1.setRecordCount(count);
    String spillDir = dirs.next();
    Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName));
    currSpillDirs.add(currSpillPath);
    String outputFile = Joiner.on("/").join(currSpillPath, spillCount++);
    try {
        fs.deleteOnExit(currSpillPath);
    } catch (IOException e) {
        // since this is meant to be used in a batches's spilling, we don't propagate the exception
        logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e);
    }
    stats.setLongStat(Metric.SPILL_COUNT, spillCount);
    BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext);
    try (AutoCloseable a = AutoCloseables.all(batchGroupList)) {
        logger.info("Merging and spilling to {}", outputFile);
        while ((count = copier.next(targetRecordCount)) > 0) {
            outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            outputContainer.setRecordCount(count);
            // note that addBatch also clears the outputContainer
            newGroup.addBatch(outputContainer);
        }
        injector.injectChecked(context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class);
        newGroup.closeOutputStream();
    } catch (Throwable e) {
        // we only need to cleanup newGroup if spill failed
        try {
            AutoCloseables.close(e, newGroup);
        } catch (Throwable t) {
        /* close() may hit the same IO issue; just ignore */
        }
        throw UserException.resourceError(e).message("External Sort encountered an error while spilling to disk").addContext(e.getMessage()).build(logger);
    } finally {
        hyperBatch.clear();
    }
    logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory());
    logger.info("Completed spilling to {}", outputFile);
    return newGroup;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 4 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by apache.

the class ExternalSortBatch method constructHyperBatch.

private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) {
    VectorContainer cont = new VectorContainer();
    for (MaterializedField field : schema) {
        ValueVector[] vectors = new ValueVector[batchGroupList.size()];
        int i = 0;
        for (BatchGroup group : batchGroupList) {
            vectors[i++] = group.getValueAccessorById(field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
        }
        cont.add(vectors);
    }
    cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    return cont;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 5 with VectorContainer

use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.

the class DumpCat method showSingleBatch.

private void showSingleBatch(VectorAccessibleSerializable vcSerializable, boolean showHeader) {
    final VectorContainer vectorContainer = vcSerializable.get();
    /* show the header of the batch */
    if (showHeader) {
        System.out.println(getBatchMetaInfo(vcSerializable).toString());
        System.out.println("Schema Information");
        for (final VectorWrapper w : vectorContainer) {
            final MaterializedField field = w.getValueVector().getField();
            System.out.println(String.format("name : %s, minor_type : %s, data_mode : %s", field.getName(), field.getType().getMinorType().toString(), field.isNullable() ? "nullable" : "non-nullable"));
        }
    }
    /* show the contents in the batch */
    VectorUtil.showVectorAccessibleContent(vectorContainer);
}
Also used : VectorWrapper(org.apache.drill.exec.record.VectorWrapper) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Aggregations

VectorContainer (org.apache.drill.exec.record.VectorContainer)176 Test (org.junit.Test)73 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)60 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)60 SubOperatorTest (org.apache.drill.test.SubOperatorTest)58 ValueVector (org.apache.drill.exec.vector.ValueVector)44 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)39 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)31 BatchSchema (org.apache.drill.exec.record.BatchSchema)27 ArrayList (java.util.ArrayList)23 MaterializedField (org.apache.drill.exec.record.MaterializedField)23 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)18 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)17 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)16 UserException (org.apache.drill.common.exceptions.UserException)15 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)15 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)15 OperatorTest (org.apache.drill.categories.OperatorTest)14 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)14 DirectRowSet (org.apache.drill.exec.physical.rowSet.DirectRowSet)14