use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.
the class PriorityQueueTemplate method add.
@SuppressWarnings("resource")
@Override
public void add(FragmentContext context, RecordBatchData batch) throws SchemaChangeException {
Stopwatch watch = Stopwatch.createStarted();
if (hyperBatch == null) {
hyperBatch = new ExpandableHyperContainer(batch.getContainer());
} else {
hyperBatch.addBatch(batch.getContainer());
}
// may not need to do this every time
doSetup(context, hyperBatch, null);
int count = 0;
SelectionVector2 sv2 = null;
if (hasSv2) {
sv2 = batch.getSv2();
}
for (; queueSize < limit && count < batch.getRecordCount(); count++) {
heapSv4.set(queueSize, batchCount, hasSv2 ? sv2.getIndex(count) : count);
queueSize++;
siftUp();
}
for (; count < batch.getRecordCount(); count++) {
heapSv4.set(limit, batchCount, hasSv2 ? sv2.getIndex(count) : count);
if (compare(limit, 0) < 0) {
swap(limit, 0);
siftDown();
}
}
batchCount++;
if (hasSv2) {
sv2.clear();
}
logger.debug("Took {} us to add {} records", watch.elapsed(TimeUnit.MICROSECONDS), count);
}
use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.
the class MergingRecordBatch method createMerger.
// private boolean isOutgoingFull() {
// return outgoingPosition == DEFAULT_ALLOC_RECORD_COUNT;
// }
/**
* Creates a generate class which implements the copy and compare methods.
*
* @return instance of a new merger based on generated code
* @throws SchemaChangeException
*/
private MergingReceiverGeneratorBase createMerger() throws SchemaChangeException {
try {
final CodeGenerator<MergingReceiverGeneratorBase> cg = CodeGenerator.get(MergingReceiverGeneratorBase.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
cg.plainJavaCapable(true);
// Uncomment out this line to debug the generated code.
// cg.saveCodeForDebugging(true);
final ClassGenerator<MergingReceiverGeneratorBase> g = cg.getRoot();
ExpandableHyperContainer batch = null;
boolean first = true;
for (final RecordBatchLoader loader : batchLoaders) {
if (first) {
batch = new ExpandableHyperContainer(loader);
first = false;
} else {
batch.addBatch(loader);
}
}
generateComparisons(g, batch);
g.setMappingSet(COPIER_MAPPING_SET);
CopyUtil.generateCopies(g, batch, true);
g.setMappingSet(MAIN_MAPPING);
final MergingReceiverGeneratorBase merger = context.getImplementationClass(cg);
merger.doSetup(context, batch, outgoingContainer);
return merger;
} catch (ClassTransformationException | IOException e) {
throw new SchemaChangeException(e);
}
}
use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.
the class TopNBatch method innerNext.
@Override
public IterOutcome innerNext() {
recordCount = 0;
if (state == BatchState.DONE) {
return IterOutcome.NONE;
}
if (schema != null) {
if (getSelectionVector4().next()) {
recordCount = sv4.getCount();
return IterOutcome.OK;
} else {
recordCount = 0;
return IterOutcome.NONE;
}
}
try {
outer: while (true) {
Stopwatch watch = Stopwatch.createStarted();
IterOutcome upstream;
if (first) {
upstream = IterOutcome.OK_NEW_SCHEMA;
first = false;
} else {
upstream = next(incoming);
}
if (upstream == IterOutcome.OK && schema == null) {
upstream = IterOutcome.OK_NEW_SCHEMA;
container.clear();
}
logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
switch(upstream) {
case NONE:
break outer;
case NOT_YET:
throw new UnsupportedOperationException();
case OUT_OF_MEMORY:
case STOP:
return upstream;
case OK_NEW_SCHEMA:
// only change in the case that the schema truly changes. Artificial schema changes are ignored.
if (!incoming.getSchema().equals(schema)) {
if (schema != null) {
if (!unionTypeEnabled) {
throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
} else {
this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
purgeAndResetPriorityQueue();
this.schemaChanged = true;
}
} else {
this.schema = incoming.getSchema();
}
}
// fall through.
case OK:
if (incoming.getRecordCount() == 0) {
for (VectorWrapper<?> w : incoming) {
w.clear();
}
break;
}
countSincePurge += incoming.getRecordCount();
batchCount++;
RecordBatchData batch;
if (schemaChanged) {
batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
} else {
batch = new RecordBatchData(incoming, oContext.getAllocator());
}
boolean success = false;
try {
batch.canonicalize();
if (priorityQueue == null) {
assert !schemaChanged;
priorityQueue = createNewPriorityQueue(context, config.getOrderings(), new ExpandableHyperContainer(batch.getContainer()), MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
}
priorityQueue.add(context, batch);
if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
purge();
countSincePurge = 0;
batchCount = 0;
}
success = true;
} finally {
if (!success) {
batch.clear();
}
}
break;
default:
throw new UnsupportedOperationException();
}
}
if (schema == null || priorityQueue == null) {
// builder may be null at this point if the first incoming batch is empty
state = BatchState.DONE;
return IterOutcome.NONE;
}
priorityQueue.generate();
this.sv4 = priorityQueue.getFinalSv4();
container.clear();
for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
container.add(w.getValueVectors());
}
container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
recordCount = sv4.getCount();
return IterOutcome.OK_NEW_SCHEMA;
} catch (SchemaChangeException | ClassTransformationException | IOException ex) {
kill(false);
logger.error("Failure during query", ex);
context.fail(ex);
return IterOutcome.STOP;
}
}
use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.
the class PriorityQueueTemplate method resetQueue.
@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
BatchSchema schema = container.getSchema();
VectorContainer newContainer = new VectorContainer();
for (MaterializedField field : schema) {
int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
// Cleanup before recreating hyperbatch and sv4.
cleanup();
hyperBatch = new ExpandableHyperContainer(newContainer);
batchCount = hyperBatch.iterator().next().getValueVectors().length;
@SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
// Reset queue size (most likely to be set to limit).
queueSize = 0;
for (int i = 0; i < v4.getTotalCount(); i++) {
heapSv4.set(i, v4.get(i));
++queueSize;
}
v4.clear();
doSetup(context, hyperBatch, null);
}
use of org.apache.drill.exec.record.ExpandableHyperContainer in project drill by apache.
the class HashJoinBatch method executeBuildPhase.
public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
// skip first batch if count is zero, as it may be an empty schema batch
if (right.getRecordCount() == 0) {
for (final VectorWrapper<?> w : right) {
w.clear();
}
rightUpstream = next(right);
}
boolean moreData = true;
while (moreData) {
switch(rightUpstream) {
case OUT_OF_MEMORY:
case NONE:
case NOT_YET:
case STOP:
moreData = false;
continue;
case OK_NEW_SCHEMA:
if (rightSchema == null) {
rightSchema = right.getSchema();
if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
final String errorMsg = new StringBuilder().append("Hash join does not support build batch with selection vectors. ").append("Build batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
throw new SchemaChangeException(errorMsg);
}
setupHashTable();
} else {
if (!rightSchema.equals(right.getSchema())) {
throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
}
hashTable.updateBatches();
}
// Fall through
case OK:
final int currentRecordCount = right.getRecordCount();
/* For every new build batch, we store some state in the helper context
* Add new state to the helper context
*/
hjHelper.addNewBatch(currentRecordCount);
// Holder contains the global index where the key is hashed into using the hash table
final IndexPointer htIndex = new IndexPointer();
// For every record in the build batch , hash the key columns
for (int i = 0; i < currentRecordCount; i++) {
hashTable.put(i, htIndex, 1);
/* Use the global index returned by the hash table, to store
* the current record index and batch index. This will be used
* later when we probe and find a match.
*/
hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
}
/* Completed hashing all records in this batch. Transfer the batch
* to the hyper vector container. Will be used when we want to retrieve
* records that have matching keys on the probe side.
*/
final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
boolean success = false;
try {
if (hyperContainer == null) {
hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
} else {
hyperContainer.addBatch(nextBatch.getContainer());
}
// completed processing a batch, increment batch index
buildBatchIndex++;
success = true;
} finally {
if (!success) {
nextBatch.clear();
}
}
break;
}
// Get the next record batch
rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
}
}
Aggregations