use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.
the class TopNBatch method purgeAndResetPriorityQueue.
/**
* Handle schema changes during execution.
* 1. Purge existing batches
* 2. Promote newly created container for new schema.
* 3. Recreate priority queue and reset with coerced container.
* @throws SchemaChangeException
*/
public void purgeAndResetPriorityQueue() throws SchemaChangeException, ClassTransformationException, IOException {
final Stopwatch watch = Stopwatch.createStarted();
final VectorContainer c = priorityQueue.getHyperBatch();
final VectorContainer newContainer = new VectorContainer(oContext);
@SuppressWarnings("resource") final SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
final SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
final SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
@SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
try {
do {
final int count = selectionVector4.getCount();
final int copiedRecords = copier.copyRecords(0, count);
assert copiedRecords == count;
for (VectorWrapper<?> v : newContainer) {
ValueVector.Mutator m = v.getValueVector().getMutator();
m.setValueCount(count);
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
newContainer.setRecordCount(count);
builder.add(newBatch);
} while (selectionVector4.next());
selectionVector4.clear();
c.clear();
final VectorContainer oldSchemaContainer = new VectorContainer(oContext);
builder.canonicalize();
builder.build(context, oldSchemaContainer);
oldSchemaContainer.setRecordCount(builder.getSv4().getCount());
final VectorContainer newSchemaContainer = SchemaUtil.coerceContainer(oldSchemaContainer, this.schema, oContext);
// Canonicalize new container since we canonicalize incoming batches before adding to queue.
final VectorContainer canonicalizedContainer = VectorContainer.canonicalize(newSchemaContainer);
canonicalizedContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
priorityQueue.cleanup();
priorityQueue = createNewPriorityQueue(context, config.getOrderings(), canonicalizedContainer, MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
priorityQueue.resetQueue(canonicalizedContainer, builder.getSv4().createNewWrapperCurrent());
} finally {
builder.clear();
builder.close();
}
logger.debug("Took {} us to purge and recreate queue for new schema", watch.elapsed(TimeUnit.MICROSECONDS));
}
use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.
the class MergeSort method merge.
/**
* Merge the set of in-memory batches to produce a single logical output in the given
* destination container, indexed by an SV4.
*
* @param batchGroups the complete set of in-memory batches
* @param batch the record batch (operator) for the sort operator
* @param destContainer the vector container for the sort operator
* @return the sv4 for this operator
*/
public SelectionVector4 merge(LinkedList<BatchGroup.InputBatch> batchGroups, VectorAccessible batch, VectorContainer destContainer) {
// Add the buffered batches to a collection that MSorter can use.
// The builder takes ownership of the batches and will release them if
// an error occurs.
builder = new SortRecordBatchBuilder(oAllocator);
for (BatchGroup.InputBatch group : batchGroups) {
RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator);
rbd.setSv2(group.getSv2());
builder.add(rbd);
}
batchGroups.clear();
try {
builder.build(context, destContainer);
sv4 = builder.getSv4();
mSorter = opCg.createNewMSorter(batch);
mSorter.setup(context, oAllocator, sv4, destContainer, sv4.getCount());
} catch (SchemaChangeException e) {
throw UserException.unsupportedError(e).message("Unexpected schema change - likely code error.").build(logger);
}
// For testing memory-leaks, inject exception after mSorter finishes setup
ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SETUP);
mSorter.sort(destContainer);
// sort may have prematurely exited due to should continue returning false.
if (!context.shouldContinue()) {
return null;
}
// For testing memory-leak purpose, inject exception after mSorter finishes sorting
ExternalSortBatch.injector.injectUnchecked(context.getExecutionControls(), ExternalSortBatch.INTERRUPTION_AFTER_SORT);
sv4 = mSorter.getSV4();
destContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
return sv4;
}
use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.
the class OrderedPartitionRecordBatch method saveSamples.
@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
recordsSampled = 0;
IterOutcome upstream;
// Start collecting batches until recordsToSample records have been collected
SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
WritableBatch batch = null;
CachedVectorContainer sampleToSave = null;
VectorContainer containerToCache = new VectorContainer();
try {
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
outer: while (recordsSampled < recordsToSample) {
upstream = next(incoming);
switch(upstream) {
case NONE:
case NOT_YET:
case STOP:
upstreamNone = true;
break outer;
default:
}
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
if (upstream == IterOutcome.NONE) {
break;
}
}
VectorContainer sortedSamples = new VectorContainer();
builder.build(context, sortedSamples);
// Sort the records according the orderings given in the configuration
Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
SelectionVector4 sv4 = builder.getSv4();
sorter.setup(context, sv4, sortedSamples);
sorter.sort(sv4, sortedSamples);
// Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
// Uses the
// the expressions from the Orderings to populate each column. There is one column for each Ordering in
// popConfig.orderings.
List<ValueVector> localAllocationVectors = Lists.newArrayList();
SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
break;
} else {
containerToCache.zeroVectors();
allocationSize *= 2;
}
}
for (VectorWrapper<?> vw : containerToCache) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
containerToCache.setRecordCount(copier.getOutputRecords());
// Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
// into a serializable wrapper object, and then add to distributed map
batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
mmap.put(mapKey, sampleToSave);
this.sampledIncomingBatches = builder.getHeldRecordBatches();
} finally {
builder.clear();
builder.close();
if (batch != null) {
batch.clear();
}
containerToCache.clear();
if (sampleToSave != null) {
sampleToSave.clear();
}
}
return true;
}
use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.
the class OrderedPartitionRecordBatch method buildTable.
private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
// Get all samples from distributed map
@SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
final VectorContainer allSamplesContainer = new VectorContainer();
final VectorContainer candidatePartitionTable = new VectorContainer();
CachedVectorContainer wrap = null;
try {
for (CachedVectorContainer w : mmap.get(mapKey)) {
containerBuilder.add(w.get());
}
containerBuilder.build(context, allSamplesContainer);
List<Ordering> orderDefs = Lists.newArrayList();
int i = 0;
for (Ordering od : popConfig.getOrderings()) {
SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
}
// sort the data incoming samples.
@SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
sorter.setup(context, newSv4, allSamplesContainer);
sorter.sort(newSv4, allSamplesContainer);
// Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
// Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
SampleCopier copier = null;
List<ValueVector> localAllocationVectors = Lists.newArrayList();
copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
for (VectorWrapper<?> vw : candidatePartitionTable) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
break;
} else {
candidatePartitionTable.zeroVectors();
allocationSize *= 2;
}
}
candidatePartitionTable.setRecordCount(copier.getOutputRecords());
@SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
} finally {
candidatePartitionTable.clear();
allSamplesContainer.clear();
containerBuilder.clear();
containerBuilder.close();
if (wrap != null) {
wrap.clear();
}
}
}
use of org.apache.drill.exec.physical.impl.sort.SortRecordBatchBuilder in project drill by apache.
the class TopNBatch method purge.
private void purge() throws SchemaChangeException {
Stopwatch watch = Stopwatch.createStarted();
VectorContainer c = priorityQueue.getHyperBatch();
VectorContainer newContainer = new VectorContainer(oContext);
@SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
if (copier == null) {
copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
} else {
for (VectorWrapper<?> i : batch) {
@SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
newContainer.add(v);
}
copier.setupRemover(context, batch, newBatch);
}
@SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
try {
do {
int count = selectionVector4.getCount();
int copiedRecords = copier.copyRecords(0, count);
assert copiedRecords == count;
for (VectorWrapper<?> v : newContainer) {
ValueVector.Mutator m = v.getValueVector().getMutator();
m.setValueCount(count);
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
newContainer.setRecordCount(count);
builder.add(newBatch);
} while (selectionVector4.next());
selectionVector4.clear();
c.clear();
VectorContainer newQueue = new VectorContainer();
builder.canonicalize();
builder.build(context, newQueue);
priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
builder.getSv4().clear();
selectionVector4.clear();
} finally {
DrillAutoCloseables.closeNoChecked(builder);
}
logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
Aggregations