use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.
the class TopNBatch method innerNext.
@Override
public IterOutcome innerNext() {
recordCount = 0;
if (state == BatchState.DONE) {
return IterOutcome.NONE;
}
if (schema != null) {
if (getSelectionVector4().next()) {
recordCount = sv4.getCount();
return IterOutcome.OK;
} else {
recordCount = 0;
return IterOutcome.NONE;
}
}
try {
outer: while (true) {
Stopwatch watch = Stopwatch.createStarted();
IterOutcome upstream;
if (first) {
upstream = IterOutcome.OK_NEW_SCHEMA;
first = false;
} else {
upstream = next(incoming);
}
if (upstream == IterOutcome.OK && schema == null) {
upstream = IterOutcome.OK_NEW_SCHEMA;
container.clear();
}
logger.debug("Took {} us to get next", watch.elapsed(TimeUnit.MICROSECONDS));
switch(upstream) {
case NONE:
break outer;
case NOT_YET:
throw new UnsupportedOperationException();
case OUT_OF_MEMORY:
case STOP:
return upstream;
case OK_NEW_SCHEMA:
// only change in the case that the schema truly changes. Artificial schema changes are ignored.
if (!incoming.getSchema().equals(schema)) {
if (schema != null) {
if (!unionTypeEnabled) {
throw new UnsupportedOperationException("Sort doesn't currently support sorts with changing schemas.");
} else {
this.schema = SchemaUtil.mergeSchemas(this.schema, incoming.getSchema());
purgeAndResetPriorityQueue();
this.schemaChanged = true;
}
} else {
this.schema = incoming.getSchema();
}
}
// fall through.
case OK:
if (incoming.getRecordCount() == 0) {
for (VectorWrapper<?> w : incoming) {
w.clear();
}
break;
}
countSincePurge += incoming.getRecordCount();
batchCount++;
RecordBatchData batch;
if (schemaChanged) {
batch = new RecordBatchData(SchemaUtil.coerceContainer(incoming, this.schema, oContext), oContext.getAllocator());
} else {
batch = new RecordBatchData(incoming, oContext.getAllocator());
}
boolean success = false;
try {
batch.canonicalize();
if (priorityQueue == null) {
assert !schemaChanged;
priorityQueue = createNewPriorityQueue(context, config.getOrderings(), new ExpandableHyperContainer(batch.getContainer()), MAIN_MAPPING, LEFT_MAPPING, RIGHT_MAPPING);
}
priorityQueue.add(context, batch);
if (countSincePurge > config.getLimit() && batchCount > batchPurgeThreshold) {
purge();
countSincePurge = 0;
batchCount = 0;
}
success = true;
} finally {
if (!success) {
batch.clear();
}
}
break;
default:
throw new UnsupportedOperationException();
}
}
if (schema == null || priorityQueue == null) {
// builder may be null at this point if the first incoming batch is empty
state = BatchState.DONE;
return IterOutcome.NONE;
}
priorityQueue.generate();
this.sv4 = priorityQueue.getFinalSv4();
container.clear();
for (VectorWrapper<?> w : priorityQueue.getHyperBatch()) {
container.add(w.getValueVectors());
}
container.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
recordCount = sv4.getCount();
return IterOutcome.OK_NEW_SCHEMA;
} catch (SchemaChangeException | ClassTransformationException | IOException ex) {
kill(false);
logger.error("Failure during query", ex);
context.fail(ex);
return IterOutcome.STOP;
}
}
use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.
the class FlattenTemplate method flattenRecords.
@Override
public final int flattenRecords(final int recordCount, final int firstOutputIndex, final Flattener.Monitor monitor) {
switch(svMode) {
case FOUR_BYTE:
throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
case TWO_BYTE:
throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
case NONE:
if (innerValueIndex == -1) {
innerValueIndex = 0;
}
final int initialInnerValueIndex = currentInnerValueIndex;
// restore state to local stack
int valueIndexLocal = valueIndex;
int innerValueIndexLocal = innerValueIndex;
int currentInnerValueIndexLocal = currentInnerValueIndex;
outer: {
int outputIndex = firstOutputIndex;
int recordsThisCall = 0;
final int valueCount = accessor.getValueCount();
for (; valueIndexLocal < valueCount; valueIndexLocal++) {
final int innerValueCount = accessor.getInnerValueCountAt(valueIndexLocal);
for (; innerValueIndexLocal < innerValueCount; innerValueIndexLocal++) {
// If we've hit the batch size limit, stop and flush what we've got so far.
if (recordsThisCall == outputLimit) {
if (bigRecords) {
/*
* We got to the limit we used before, but did we go over
* the bigRecordsBufferSize in the second half of the batch? If
* so, we'll need to adjust the batch limits.
*/
adjustBatchLimits(1, monitor, recordsThisCall);
}
// Flush this batch.
break outer;
}
/*
* At the moment, the output record includes the input record, so for very
* large records that we're flattening, we're carrying forward the original
* record as well as the flattened element. We've seen a case where flattening a 4MB
* record with a 20,000 element array causing memory usage to explode. To avoid
* that until we can push down the selected fields to operators like this, we
* also limit the amount of memory in use at one time.
*
* We have to have written at least one record to be able to get a buffer that will
* have a real allocator, so we have to do this lazily. We won't check the limit
* for the first two records, but that keeps this simple.
*/
if (bigRecords) {
/*
* If we're halfway through the outputLimit, check on our memory
* usage so far.
*/
if (recordsThisCall == outputLimit / 2) {
/*
* If we've used more than half the space we've used for big records
* in the past, we've seen even bigger records than before, so stop and
* see if we need to flush here before we go over bigRecordsBufferSize
* memory usage, and reduce the outputLimit further before we continue
* with the next batch.
*/
if (adjustBatchLimits(2, monitor, recordsThisCall)) {
break outer;
}
}
} else {
if (outputAllocator.getAllocatedMemory() > OUTPUT_MEMORY_LIMIT) {
/*
* We're dealing with big records. Reduce the outputLimit to
* the current record count, and take note of how much space the
* vectors report using for that. We'll use those numbers as limits
* going forward in order to avoid allocating more memory.
*/
bigRecords = true;
outputLimit = Math.min(recordsThisCall, outputLimit);
if (outputLimit < 1) {
throw new IllegalStateException("flatten outputLimit (" + outputLimit + ") won't make progress");
}
/*
* This will differ from what the allocator reports because of
* overhead. But the allocator check is much cheaper to do, so we
* only compute this at selected times.
*/
bigRecordsBufferSize = monitor.getBufferSizeFor(recordsThisCall);
// Stop and flush.
break outer;
}
}
try {
doEval(valueIndexLocal, outputIndex);
} catch (OversizedAllocationException ex) {
// unable to flatten due to a soft buffer overflow. split the batch here and resume execution.
logger.debug("Reached allocation limit. Splitting the batch at input index: {} - inner index: {} - current completed index: {}", valueIndexLocal, innerValueIndexLocal, currentInnerValueIndexLocal);
/*
* TODO
* We can't further reduce the output limits here because it won't have
* any effect. The vectors have already gotten large, and there's currently
* no way to reduce their size. Ideally, we could reduce the outputLimit,
* and reduce the size of the currently used vectors.
*/
break outer;
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
outputIndex++;
currentInnerValueIndexLocal++;
++recordsThisCall;
}
innerValueIndexLocal = 0;
}
}
// save state to heap
valueIndex = valueIndexLocal;
innerValueIndex = innerValueIndexLocal;
currentInnerValueIndex = currentInnerValueIndexLocal;
// transfer the computed range
final int delta = currentInnerValueIndexLocal - initialInnerValueIndex;
for (TransferPair t : transfers) {
t.splitAndTransfer(initialInnerValueIndex, delta);
}
return delta;
default:
throw new UnsupportedOperationException();
}
}
use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.
the class HashJoinBatch method setupHashTable.
public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
// Setup the hash table configuration object
int conditionsSize = conditions.size();
final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);
// Create named expressions from the conditions
for (int i = 0; i < conditionsSize; i++) {
rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
}
// Set the left named expression to be null if the probe batch is empty.
if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
leftExpr = null;
} else {
if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
final String errorMsg = new StringBuilder().append("Hash join does not support probe batch with selection vectors. ").append("Probe batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
throw new SchemaChangeException(errorMsg);
}
}
final HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);
// Create the chained hash table
final ChainedHashTable ht = new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
hashTable = ht.createAndSetupHashTable(null);
}
use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.
the class HashJoinBatch method executeBuildPhase.
public void executeBuildPhase() throws SchemaChangeException, ClassTransformationException, IOException {
// skip first batch if count is zero, as it may be an empty schema batch
if (right.getRecordCount() == 0) {
for (final VectorWrapper<?> w : right) {
w.clear();
}
rightUpstream = next(right);
}
boolean moreData = true;
while (moreData) {
switch(rightUpstream) {
case OUT_OF_MEMORY:
case NONE:
case NOT_YET:
case STOP:
moreData = false;
continue;
case OK_NEW_SCHEMA:
if (rightSchema == null) {
rightSchema = right.getSchema();
if (rightSchema.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
final String errorMsg = new StringBuilder().append("Hash join does not support build batch with selection vectors. ").append("Build batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
throw new SchemaChangeException(errorMsg);
}
setupHashTable();
} else {
if (!rightSchema.equals(right.getSchema())) {
throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in build side.", rightSchema, right.getSchema());
}
hashTable.updateBatches();
}
// Fall through
case OK:
final int currentRecordCount = right.getRecordCount();
/* For every new build batch, we store some state in the helper context
* Add new state to the helper context
*/
hjHelper.addNewBatch(currentRecordCount);
// Holder contains the global index where the key is hashed into using the hash table
final IndexPointer htIndex = new IndexPointer();
// For every record in the build batch , hash the key columns
for (int i = 0; i < currentRecordCount; i++) {
hashTable.put(i, htIndex, 1);
/* Use the global index returned by the hash table, to store
* the current record index and batch index. This will be used
* later when we probe and find a match.
*/
hjHelper.setCurrentIndex(htIndex.value, buildBatchIndex, i);
}
/* Completed hashing all records in this batch. Transfer the batch
* to the hyper vector container. Will be used when we want to retrieve
* records that have matching keys on the probe side.
*/
final RecordBatchData nextBatch = new RecordBatchData(right, oContext.getAllocator());
boolean success = false;
try {
if (hyperContainer == null) {
hyperContainer = new ExpandableHyperContainer(nextBatch.getContainer());
} else {
hyperContainer.addBatch(nextBatch.getContainer());
}
// completed processing a batch, increment batch index
buildBatchIndex++;
success = true;
} finally {
if (!success) {
nextBatch.clear();
}
}
break;
}
// Get the next record batch
rightUpstream = next(HashJoinHelper.RIGHT_INPUT, right);
}
}
use of org.apache.drill.exec.exception.SchemaChangeException in project drill by apache.
the class HashJoinBatch method buildSchema.
@Override
protected void buildSchema() throws SchemaChangeException {
leftUpstream = next(left);
rightUpstream = next(right);
if (leftUpstream == IterOutcome.STOP || rightUpstream == IterOutcome.STOP) {
state = BatchState.STOP;
return;
}
if (leftUpstream == IterOutcome.OUT_OF_MEMORY || rightUpstream == IterOutcome.OUT_OF_MEMORY) {
state = BatchState.OUT_OF_MEMORY;
return;
}
// Initialize the hash join helper context
hjHelper = new HashJoinHelper(context, oContext.getAllocator());
try {
rightSchema = right.getSchema();
final VectorContainer vectors = new VectorContainer(oContext);
for (final VectorWrapper<?> w : right) {
vectors.addOrGet(w.getField());
}
vectors.buildSchema(SelectionVectorMode.NONE);
vectors.setRecordCount(0);
hyperContainer = new ExpandableHyperContainer(vectors);
hjHelper.addNewBatch(0);
buildBatchIndex++;
setupHashTable();
hashJoinProbe = setupHashJoinProbe();
// Build the container schema and set the counts
for (final VectorWrapper<?> w : container) {
w.getValueVector().allocateNew();
}
container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
container.setRecordCount(outputRecords);
} catch (IOException | ClassTransformationException e) {
throw new SchemaChangeException(e);
}
}
Aggregations