use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class DirectRowSet method toContainer.
private static VectorContainer toContainer(VectorAccessible va, BufferAllocator allocator) {
VectorContainer container = VectorContainer.getTransferClone(va, allocator);
container.buildSchema(SelectionVectorMode.NONE);
container.setRecordCount(va.getRecordCount());
return container;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class ExternalSortBatch method processBatch.
/**
* Process the converted incoming batch by adding it to the in-memory store
* of data, or spilling data to disk when necessary.
*/
@SuppressWarnings("resource")
private void processBatch() {
if (incoming.getRecordCount() == 0) {
return;
}
// Determine actual sizes of the incoming batch before taking
// ownership. Allows us to figure out if we need to spill first,
// to avoid overflowing memory simply due to ownership transfer.
RecordBatchSizer sizer = analyzeIncomingBatch();
if (isSpillNeeded(sizer.actualSize())) {
spillFromMemory();
}
// Sanity check. We should now be below the buffer memory maximum.
long startMem = allocator.getAllocatedMemory();
if (startMem > bufferMemoryPool) {
logger.error("ERROR: Failed to spill above buffer limit. Buffer pool = {}, memory = {}", bufferMemoryPool, startMem);
}
// Convert the incoming batch to the agreed-upon schema.
// No converted batch means we got an empty input batch.
// Converting the batch transfers memory ownership to our
// allocator. This gives a round-about way to learn the batch
// size: check the before and after memory levels, then use
// the difference as the batch size, in bytes.
VectorContainer convertedBatch = convertBatch();
if (convertedBatch == null) {
return;
}
SelectionVector2 sv2;
try {
sv2 = makeSelectionVector();
} catch (Exception e) {
convertedBatch.clear();
throw e;
}
// Compute batch size, including allocation of an sv2.
long endMem = allocator.getAllocatedMemory();
long batchSize = endMem - startMem;
int count = sv2.getCount();
inputRecordCount += count;
inputBatchCount++;
totalInputBytes += sizer.actualSize();
if (minimumBufferSpace == 0) {
minimumBufferSpace = endMem;
} else {
minimumBufferSpace = Math.min(minimumBufferSpace, endMem);
}
stats.setLongStat(Metric.MIN_BUFFER, minimumBufferSpace);
// Update the size based on the actual record count, not
// the effective count as given by the selection vector
// (which may exclude some records due to filtering.)
updateMemoryEstimates(batchSize, sizer);
// Sort the incoming batch using either the original selection vector,
// or a new one created here.
SingleBatchSorter sorter;
sorter = opCodeGen.getSorter(convertedBatch);
try {
sorter.setup(context, sv2, convertedBatch);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
try {
sorter.sort(sv2);
} catch (SchemaChangeException e) {
convertedBatch.clear();
throw UserException.unsupportedError(e).message("Unexpected schema change.").build(logger);
}
RecordBatchData rbd = new RecordBatchData(convertedBatch, allocator);
try {
rbd.setSv2(sv2);
bufferedBatches.add(new BatchGroup.InputBatch(rbd.getContainer(), rbd.getSv2(), oContext, sizer.netSize()));
if (peakNumBatches < bufferedBatches.size()) {
peakNumBatches = bufferedBatches.size();
stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
}
} catch (Throwable t) {
rbd.clear();
throw t;
}
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class ExternalSortBatch method mergeAndSpill.
public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException {
logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
logger.debug("mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory());
VectorContainer outputContainer = new VectorContainer();
List<BatchGroup> batchGroupList = Lists.newArrayList();
int batchCount = batchGroups.size();
for (int i = 0; i < batchCount / 2; i++) {
if (batchGroups.size() == 0) {
break;
}
@SuppressWarnings("resource") BatchGroup batch = batchGroups.pollLast();
assert batch != null : "Encountered a null batch during merge and spill operation";
batchGroupList.add(batch);
}
if (batchGroupList.size() == 0) {
return null;
}
int estimatedRecordSize = 0;
for (VectorWrapper<?> w : batchGroupList.get(0)) {
try {
estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
} catch (UnsupportedOperationException e) {
estimatedRecordSize += 50;
}
}
int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize);
VectorContainer hyperBatch = constructHyperBatch(batchGroupList);
createCopier(hyperBatch, batchGroupList, outputContainer, true);
int count = copier.next(targetRecordCount);
assert count > 0;
logger.debug("mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount);
// 1 output container is kept in memory, so we want to hold on to it and transferClone
// allows keeping ownership
VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext);
c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
c1.setRecordCount(count);
String spillDir = dirs.next();
Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName));
currSpillDirs.add(currSpillPath);
String outputFile = Joiner.on("/").join(currSpillPath, spillCount++);
try {
fs.deleteOnExit(currSpillPath);
} catch (IOException e) {
// since this is meant to be used in a batches's spilling, we don't propagate the exception
logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e);
}
stats.setLongStat(Metric.SPILL_COUNT, spillCount);
BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext);
try (AutoCloseable a = AutoCloseables.all(batchGroupList)) {
logger.info("Merging and spilling to {}", outputFile);
while ((count = copier.next(targetRecordCount)) > 0) {
outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
outputContainer.setRecordCount(count);
// note that addBatch also clears the outputContainer
newGroup.addBatch(outputContainer);
}
injector.injectChecked(context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class);
newGroup.closeOutputStream();
} catch (Throwable e) {
// we only need to cleanup newGroup if spill failed
try {
AutoCloseables.close(e, newGroup);
} catch (Throwable t) {
/* close() may hit the same IO issue; just ignore */
}
throw UserException.resourceError(e).message("External Sort encountered an error while spilling to disk").addContext(e.getMessage()).build(logger);
} finally {
hyperBatch.clear();
}
logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory());
logger.info("Completed spilling to {}", outputFile);
return newGroup;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class ExternalSortBatch method constructHyperBatch.
private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) {
VectorContainer cont = new VectorContainer();
for (MaterializedField field : schema) {
ValueVector[] vectors = new ValueVector[batchGroupList.size()];
int i = 0;
for (BatchGroup group : batchGroupList) {
vectors[i++] = group.getValueAccessorById(field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
}
cont.add(vectors);
}
cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
return cont;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by axbaretto.
the class DumpCat method showSingleBatch.
private void showSingleBatch(VectorAccessibleSerializable vcSerializable, boolean showHeader) {
final VectorContainer vectorContainer = vcSerializable.get();
/* show the header of the batch */
if (showHeader) {
System.out.println(getBatchMetaInfo(vcSerializable).toString());
System.out.println("Schema Information");
for (final VectorWrapper w : vectorContainer) {
final MaterializedField field = w.getValueVector().getField();
System.out.println(String.format("name : %s, minor_type : %s, data_mode : %s", field.getName(), field.getType().getMinorType().toString(), field.isNullable() ? "nullable" : "non-nullable"));
}
}
/* show the contents in the batch */
VectorUtil.showVectorAccessibleContent(vectorContainer);
}
Aggregations