use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class OrderedPartitionRecordBatch method buildTable.
private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
// Get all samples from distributed map
@SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
final VectorContainer allSamplesContainer = new VectorContainer();
final VectorContainer candidatePartitionTable = new VectorContainer();
CachedVectorContainer wrap = null;
try {
for (CachedVectorContainer w : mmap.get(mapKey)) {
containerBuilder.add(w.get());
}
containerBuilder.build(context, allSamplesContainer);
List<Ordering> orderDefs = Lists.newArrayList();
int i = 0;
for (Ordering od : popConfig.getOrderings()) {
SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
}
// sort the data incoming samples.
@SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
sorter.setup(context, newSv4, allSamplesContainer);
sorter.sort(newSv4, allSamplesContainer);
// Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
// Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
SampleCopier copier = null;
List<ValueVector> localAllocationVectors = Lists.newArrayList();
copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
for (VectorWrapper<?> vw : candidatePartitionTable) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
break;
} else {
candidatePartitionTable.zeroVectors();
allocationSize *= 2;
}
}
candidatePartitionTable.setRecordCount(copier.getOutputRecords());
@SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
} finally {
candidatePartitionTable.clear();
allSamplesContainer.clear();
containerBuilder.clear();
containerBuilder.close();
if (wrap != null) {
wrap.clear();
}
}
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class FrameSupportTemplate method setup.
@Override
public void setup(final List<WindowDataBatch> batches, final VectorContainer container, final OperatorContext oContext, final boolean requireFullPartition, final WindowPOP popConfig) throws SchemaChangeException {
this.container = container;
this.batches = batches;
internal = new VectorContainer(oContext);
allocateInternal();
outputCount = 0;
this.requireFullPartition = requireFullPartition;
this.popConfig = popConfig;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class SortRecordBatchBuilder method getHeldRecordBatches.
public List<VectorContainer> getHeldRecordBatches() {
ArrayList<VectorContainer> containerList = Lists.newArrayList();
for (BatchSchema bs : batches.keySet()) {
for (RecordBatchData bd : batches.get(bs)) {
VectorContainer c = bd.getContainer();
c.setRecordCount(bd.getRecordCount());
containerList.add(c);
}
}
batches.clear();
return containerList;
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class TopNBatch method purge.
private void purge() throws SchemaChangeException {
Stopwatch watch = Stopwatch.createStarted();
VectorContainer c = priorityQueue.getHyperBatch();
VectorContainer newContainer = new VectorContainer(oContext);
@SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
if (copier == null) {
copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
} else {
for (VectorWrapper<?> i : batch) {
@SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
newContainer.add(v);
}
copier.setupRemover(context, batch, newBatch);
}
@SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
try {
do {
int count = selectionVector4.getCount();
int copiedRecords = copier.copyRecords(0, count);
assert copiedRecords == count;
for (VectorWrapper<?> v : newContainer) {
ValueVector.Mutator m = v.getValueVector().getMutator();
m.setValueCount(count);
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
newContainer.setRecordCount(count);
builder.add(newBatch);
} while (selectionVector4.next());
selectionVector4.clear();
c.clear();
VectorContainer newQueue = new VectorContainer();
builder.canonicalize();
builder.build(context, newQueue);
priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
builder.getSv4().clear();
selectionVector4.clear();
} finally {
DrillAutoCloseables.closeNoChecked(builder);
}
logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
use of org.apache.drill.exec.record.VectorContainer in project drill by apache.
the class PriorityQueueTemplate method resetQueue.
@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
BatchSchema schema = container.getSchema();
VectorContainer newContainer = new VectorContainer();
for (MaterializedField field : schema) {
int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
// Cleanup before recreating hyperbatch and sv4.
cleanup();
hyperBatch = new ExpandableHyperContainer(newContainer);
batchCount = hyperBatch.iterator().next().getValueVectors().length;
@SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
// Reset queue size (most likely to be set to limit).
queueSize = 0;
for (int i = 0; i < v4.getTotalCount(); i++) {
heapSv4.set(i, v4.get(i));
++queueSize;
}
v4.clear();
doSetup(context, hyperBatch, null);
}
Aggregations