use of org.apache.drill.exec.record.SchemaBuilder in project drill by apache.
the class MergingRecordBatch method innerNext.
@Override
public IterOutcome innerNext() {
if (fragProviders.length == 0) {
return IterOutcome.NONE;
}
boolean schemaChanged = false;
if (prevBatchWasFull) {
logger.debug("Outgoing vectors were full on last iteration");
allocateOutgoing();
outgoingPosition = 0;
prevBatchWasFull = false;
}
if (!hasMoreIncoming) {
logger.debug("next() was called after all values have been processed");
outgoingPosition = 0;
return IterOutcome.NONE;
}
// lazy initialization
if (!hasRun) {
// first iteration is always a schema change
schemaChanged = true;
// set up each (non-empty) incoming record batch
final List<RawFragmentBatch> rawBatches = Lists.newArrayList();
int p = 0;
for (@SuppressWarnings("unused") final RawFragmentBatchProvider provider : fragProviders) {
RawFragmentBatch rawBatch;
// check if there is a batch in temp holder before calling getNext(), as it may have been used when building schema
if (tempBatchHolder[p] != null) {
rawBatch = tempBatchHolder[p];
tempBatchHolder[p] = null;
} else {
try {
rawBatch = getNext(p);
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
assert rawBatch != null : "rawBatch is null although context.shouldContinue() == true";
if (rawBatch.getHeader().getDef().getRecordCount() != 0) {
rawBatches.add(rawBatch);
} else {
// save an empty batch to use for schema purposes. ignore batch if it contains no fields, and thus no schema
if (emptyBatch == null && rawBatch.getHeader().getDef().getFieldCount() != 0) {
emptyBatch = rawBatch;
}
try {
while ((rawBatch = getNext(p)) != null && rawBatch.getHeader().getDef().getRecordCount() == 0) {
// Do nothing
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
clearBatches(rawBatches);
return IterOutcome.STOP;
}
if (rawBatch != null) {
rawBatches.add(rawBatch);
} else {
rawBatches.add(emptyBatch);
}
}
p++;
}
// allocate the incoming record batch loaders
senderCount = rawBatches.size();
incomingBatches = new RawFragmentBatch[senderCount];
batchOffsets = new int[senderCount];
batchLoaders = new RecordBatchLoader[senderCount];
for (int i = 0; i < senderCount; ++i) {
incomingBatches[i] = rawBatches.get(i);
batchLoaders[i] = new RecordBatchLoader(oContext.getAllocator());
}
// after this point all batches have moved to incomingBatches
rawBatches.clear();
int i = 0;
for (final RawFragmentBatch batch : incomingBatches) {
// initialize the incoming batchLoaders
final UserBitShared.RecordBatchDef rbd = batch.getHeader().getDef();
try {
batchLoaders[i].load(rbd, batch.getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException e) {
logger.error("MergingReceiver failed to load record batch from remote host. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
batch.release();
++batchOffsets[i];
++i;
}
// Canonicalize each incoming batch, so that vectors are alphabetically sorted based on SchemaPath.
for (final RecordBatchLoader loader : batchLoaders) {
loader.canonicalize();
}
// Ensure all the incoming batches have the identical schema.
if (!isSameSchemaAmongBatches(batchLoaders)) {
context.fail(new SchemaChangeException("Incoming batches for merging receiver have different schemas!"));
return IterOutcome.STOP;
}
// create the outgoing schema and vector container, and allocate the initial batch
final SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
for (final VectorWrapper<?> v : batchLoaders[0]) {
// add field to the output schema
bldr.addField(v.getField());
// allocate a new value vector
outgoingContainer.addOrGet(v.getField());
}
allocateOutgoing();
outgoingContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
// generate code for merge operations (copy and compare)
try {
merger = createMerger();
} catch (final SchemaChangeException e) {
logger.error("Failed to generate code for MergingReceiver. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
// allocate the priority queue with the generated comparator
this.pqueue = new PriorityQueue<>(fragProviders.length, new Comparator<Node>() {
@Override
public int compare(final Node node1, final Node node2) {
final int leftIndex = (node1.batchId << 16) + node1.valueIndex;
final int rightIndex = (node2.batchId << 16) + node2.valueIndex;
try {
return merger.doEval(leftIndex, rightIndex);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
}
});
// populate the priority queue with initial values
for (int b = 0; b < senderCount; ++b) {
while (batchLoaders[b] != null && batchLoaders[b].getRecordCount() == 0) {
try {
final RawFragmentBatch batch = getNext(b);
incomingBatches[b] = batch;
if (batch != null) {
batchLoaders[b].load(batch.getHeader().getDef(), batch.getBody());
} else {
batchLoaders[b].clear();
batchLoaders[b] = null;
if (!context.shouldContinue()) {
return IterOutcome.STOP;
}
}
} catch (IOException | SchemaChangeException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (batchLoaders[b] != null) {
pqueue.add(new Node(b, 0));
}
}
hasRun = true;
// finished lazy initialization
}
while (!pqueue.isEmpty()) {
// pop next value from pq and copy to outgoing batch
final Node node = pqueue.peek();
if (!copyRecordToOutgoingBatch(node)) {
logger.debug("Outgoing vectors space is full; breaking");
prevBatchWasFull = true;
}
pqueue.poll();
if (node.valueIndex == batchLoaders[node.batchId].getRecordCount() - 1) {
// reached the end of an incoming record batch
RawFragmentBatch nextBatch;
try {
nextBatch = getNext(node.batchId);
while (nextBatch != null && nextBatch.getHeader().getDef().getRecordCount() == 0) {
nextBatch = getNext(node.batchId);
}
assert nextBatch != null || inputCounts[node.batchId] == outputCounts[node.batchId] : String.format("Stream %d input count: %d output count %d", node.batchId, inputCounts[node.batchId], outputCounts[node.batchId]);
if (nextBatch == null && !context.shouldContinue()) {
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
incomingBatches[node.batchId] = nextBatch;
if (nextBatch == null) {
// batch is empty
boolean allBatchesEmpty = true;
for (final RawFragmentBatch batch : incomingBatches) {
// see if all batches are empty so we can return OK_* or NONE
if (batch != null) {
allBatchesEmpty = false;
break;
}
}
if (allBatchesEmpty) {
hasMoreIncoming = false;
break;
}
// ignored in subsequent iterations.
if (prevBatchWasFull) {
break;
} else {
continue;
}
}
final UserBitShared.RecordBatchDef rbd = incomingBatches[node.batchId].getHeader().getDef();
try {
batchLoaders[node.batchId].load(rbd, incomingBatches[node.batchId].getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException ex) {
context.fail(ex);
return IterOutcome.STOP;
}
incomingBatches[node.batchId].release();
batchOffsets[node.batchId] = 0;
// add front value from batch[x] to priority queue
if (batchLoaders[node.batchId].getRecordCount() != 0) {
pqueue.add(new Node(node.batchId, 0));
}
} else {
pqueue.add(new Node(node.batchId, node.valueIndex + 1));
}
if (prevBatchWasFull) {
break;
}
}
// set the value counts in the outgoing vectors
for (final VectorWrapper<?> vw : outgoingContainer) {
vw.getValueVector().getMutator().setValueCount(outgoingPosition);
}
if (pqueue.isEmpty()) {
state = BatchState.DONE;
}
if (schemaChanged) {
return IterOutcome.OK_NEW_SCHEMA;
} else {
return IterOutcome.OK;
}
}
Aggregations