use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.
the class HashJoinProbeTemplate method executeProbePhase.
public void executeProbePhase() throws SchemaChangeException {
while (outputRecords < TARGET_RECORDS_PER_BATCH && probeState != ProbeState.DONE && probeState != ProbeState.PROJECT_RIGHT) {
// Check if we have processed all records in this batch we need to invoke next
if (recordsProcessed == recordsToProcess) {
// Done processing all records in the previous batch, clean up!
for (VectorWrapper<?> wrapper : probeBatch) {
wrapper.getValueVector().clear();
}
IterOutcome leftUpstream = outgoingJoinBatch.next(HashJoinHelper.LEFT_INPUT, probeBatch);
switch(leftUpstream) {
case NONE:
case NOT_YET:
case STOP:
recordsProcessed = 0;
recordsToProcess = 0;
probeState = ProbeState.DONE;
// We are done with the probe phase. If its a RIGHT or a FULL join get the unmatched indexes from the build side
if (joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL) {
probeState = ProbeState.PROJECT_RIGHT;
}
continue;
case OK_NEW_SCHEMA:
if (probeBatch.getSchema().equals(probeSchema)) {
doSetup(outgoingJoinBatch.getContext(), buildBatch, probeBatch, outgoingJoinBatch);
hashTable.updateBatches();
} else {
throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in probe side.", probeSchema, probeBatch.getSchema());
}
case OK:
recordsToProcess = probeBatch.getRecordCount();
recordsProcessed = 0;
// If we received an empty batch do nothing
if (recordsToProcess == 0) {
continue;
}
}
}
int probeIndex = -1;
// Check if we need to drain the next row in the probe side
if (getNextRecord) {
if (hashTable != null) {
probeIndex = hashTable.containsKey(recordsProcessed, true);
}
if (probeIndex != -1) {
/* The current probe record has a key that matches. Get the index
* of the first row in the build side that matches the current key
*/
currentCompositeIdx = hjHelper.getStartIndex(probeIndex);
/* Record in the build side at currentCompositeIdx has a matching record in the probe
* side. Set the bit corresponding to this index so if we are doing a FULL or RIGHT
* join we keep track of which records we need to project at the end
*/
hjHelper.setRecordMatched(currentCompositeIdx);
projectBuildRecord(currentCompositeIdx, outputRecords);
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
/* Projected single row from the build side with matching key but there
* may be more rows with the same key. Check if that's the case
*/
currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
if (currentCompositeIdx == -1) {
/* We only had one row in the build side that matched the current key
* from the probe side. Drain the next row in the probe side.
*/
recordsProcessed++;
} else {
/* There is more than one row with the same key on the build side
* don't drain more records from the probe side till we have projected
* all the rows with this key
*/
getNextRecord = false;
}
} else {
// If we have a left outer join, project the keys
if (joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL) {
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
}
recordsProcessed++;
}
} else {
hjHelper.setRecordMatched(currentCompositeIdx);
projectBuildRecord(currentCompositeIdx, outputRecords);
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
if (currentCompositeIdx == -1) {
// We don't have any more rows matching the current key on the build side, move on to the next probe row
getNextRecord = true;
recordsProcessed++;
}
}
}
}
use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.
the class BaseRootExec method next.
public final IterOutcome next(final RecordBatch b) {
stats.stopProcessing();
IterOutcome next;
try {
next = b.next();
} finally {
stats.startProcessing();
}
switch(next) {
case OK_NEW_SCHEMA:
stats.batchReceived(0, b.getRecordCount(), true);
break;
case OK:
stats.batchReceived(0, b.getRecordCount(), false);
break;
}
return next;
}
use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.
the class PartitionSenderRootExec method innerNext.
@Override
public boolean innerNext() {
if (!ok) {
return false;
}
IterOutcome out;
if (!done) {
out = next(incoming);
} else {
incoming.kill(true);
out = IterOutcome.NONE;
}
logger.debug("Partitioner.next(): got next record batch with status {}", out);
if (first && out == IterOutcome.OK) {
out = IterOutcome.OK_NEW_SCHEMA;
}
switch(out) {
case NONE:
try {
// send any pending batches
if (partitioner != null) {
partitioner.flushOutgoingBatches(true, false);
} else {
sendEmptyBatch(true);
}
} catch (IOException e) {
incoming.kill(false);
logger.error("Error while creating partitioning sender or flushing outgoing batches", e);
context.fail(e);
}
return false;
case OUT_OF_MEMORY:
throw new OutOfMemoryException();
case STOP:
if (partitioner != null) {
partitioner.clear();
}
return false;
case OK_NEW_SCHEMA:
try {
// send all existing batches
if (partitioner != null) {
partitioner.flushOutgoingBatches(false, true);
partitioner.clear();
}
createPartitioner();
if (first) {
// Send an empty batch for fast schema
first = false;
sendEmptyBatch(false);
}
} catch (IOException e) {
incoming.kill(false);
logger.error("Error while flushing outgoing batches", e);
context.fail(e);
return false;
} catch (SchemaChangeException e) {
incoming.kill(false);
logger.error("Error while setting up partitioner", e);
context.fail(e);
return false;
}
case OK:
try {
partitioner.partitionBatch(incoming);
} catch (IOException e) {
context.fail(e);
incoming.kill(false);
return false;
}
for (VectorWrapper<?> v : incoming) {
v.clear();
}
return true;
case NOT_YET:
default:
throw new IllegalStateException();
}
}
use of org.apache.drill.exec.record.RecordBatch.IterOutcome in project drill by apache.
the class IteratorValidatorBatchIterator method next.
@Override
public IterOutcome next() {
logger.trace("[#{}; on {}]: next() called.", instNum, batchTypeName);
final IterOutcome prevBatchState = batchState;
try {
// Check whether next() should even have been called in current state.
if (null != exceptionState) {
throw new IllegalStateException(String.format("next() [on #%d; %s] called again after it threw %s (after" + " returning %s). Caller should not have called next() again.", instNum, batchTypeName, exceptionState, batchState));
}
// (Note: This could use validationState.)
if (batchState == NONE || batchState == STOP) {
throw new IllegalStateException(String.format("next() [on #%d, %s] called again after it returned %s." + " Caller should not have called next() again.", instNum, batchTypeName, batchState));
}
// Now get result from upstream next().
batchState = incoming.next();
logger.trace("[#{}; on {}]: incoming next() return: ({} ->) {}", instNum, batchTypeName, prevBatchState, batchState);
// Check state transition and update high-level state.
switch(batchState) {
case OK_NEW_SCHEMA:
// OK_NEW_SCHEMA is allowed at any time, except if terminated (checked
// above).
// OK_NEW_SCHEMA moves to have-seen-schema state.
validationState = ValidationState.HAVE_SCHEMA;
validateBatch();
break;
case OK:
// (checked above).
if (validationState != ValidationState.HAVE_SCHEMA) {
throw new IllegalStateException(String.format("next() returned %s without first returning %s [#%d, %s]", batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
}
validateBatch();
// OK doesn't change high-level state.
break;
case NONE:
// already terminated (checked above).
if (validationState != ValidationState.HAVE_SCHEMA) {
throw new IllegalStateException(String.format("next() returned %s without first returning %s [#%d, %s]", batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
}
// NONE moves to terminal high-level state.
validationState = ValidationState.TERMINAL;
break;
case STOP:
// STOP is allowed at any time, except if already terminated (checked
// above).
// STOP moves to terminal high-level state.
validationState = ValidationState.TERMINAL;
break;
case NOT_YET:
case OUT_OF_MEMORY:
// NOT_YET and OUT_OF_MEMORY OK don't change high-level state.
break;
default:
throw new AssertionError("Unhandled new " + IterOutcome.class.getSimpleName() + " value " + batchState);
}
// Validate schema when available.
if (batchState == OK || batchState == OK_NEW_SCHEMA) {
final BatchSchema prevLastSchema = lastSchema;
final BatchSchema prevLastNewSchema = lastNewSchema;
lastSchema = incoming.getSchema();
if (batchState == OK_NEW_SCHEMA) {
lastNewSchema = lastSchema;
}
if (logger.isTraceEnabled()) {
logger.trace("[#{}; on {}]: incoming next() return: #records = {}, " + "\n schema:" + "\n {}, " + "\n prev. new ({}):" + "\n {}", instNum, batchTypeName, incoming.getRecordCount(), lastSchema, lastSchema.equals(prevLastNewSchema) ? "equal" : "not equal", prevLastNewSchema);
}
if (lastSchema == null) {
throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has a null schema. This is not allowed.", instNum, batchTypeName));
}
if (lastSchema.getFieldCount() == 0) {
throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has an empty schema. This is not allowed.", instNum, batchTypeName));
}
if (incoming.getRecordCount() > MAX_BATCH_SIZE) {
throw new IllegalStateException(String.format("Incoming batch [#%d, %s] has size %d, which is beyond the" + " limit of %d", instNum, batchTypeName, incoming.getRecordCount(), MAX_BATCH_SIZE));
}
if (VALIDATE_VECTORS) {
VectorValidator.validate(incoming);
}
}
return batchState;
} catch (RuntimeException | Error e) {
exceptionState = e;
logger.trace("[#{}, on {}]: incoming next() exception: ({} ->) {}", instNum, batchTypeName, prevBatchState, exceptionState, exceptionState);
throw e;
}
}
Aggregations