use of org.apache.drill.exec.record.RawFragmentBatch in project drill by apache.
the class BaseRawBatchBuffer method getNext.
@Override
public RawFragmentBatch getNext() throws IOException {
if (outOfMemory.get()) {
if (bufferQueue.size() < 10) {
outOfMemory.set(false);
}
}
RawFragmentBatch b;
try {
b = bufferQueue.poll();
// if we didn't get a batch, block on waiting for queue.
if (b == null && (!isTerminated() || !bufferQueue.isEmpty())) {
b = bufferQueue.take();
}
} catch (final InterruptedException e) {
// We expect that the interrupt means the fragment is canceled or failed, so we should kill this buffer
if (!context.shouldContinue()) {
kill(context);
} else {
throw new DrillRuntimeException("Interrupted but context.shouldContinue() is true", e);
}
// Preserve evidence that the interruption occurred so that code higher up on the call stack can learn of the
// interruption and respond to it if it wants to.
Thread.currentThread().interrupt();
return null;
}
if (context.isOverMemoryLimit()) {
outOfMemory.set(true);
}
if (b != null) {
upkeep(b);
if (b.getHeader().getIsLastBatch()) {
logger.debug("Got last batch from {}:{}", b.getHeader().getSendingMajorFragmentId(), b.getHeader().getSendingMinorFragmentId());
final int remainingStreams = decrementStreamCounter();
if (remainingStreams == 0) {
logger.debug("Streams finished");
allStreamsFinished();
}
}
} else {
if (!bufferQueue.isEmpty()) {
throw new IllegalStateException("Returning null when there are batches left in queue");
}
if (!isTerminated()) {
throw new IllegalStateException("Returning null when not finished");
}
}
assertAckSent(b);
return b;
}
use of org.apache.drill.exec.record.RawFragmentBatch in project drill by apache.
the class MergingRecordBatch method buildSchema.
@Override
public void buildSchema() throws SchemaChangeException {
// find frag provider that has data to use to build schema, and put in tempBatchHolder for later use
tempBatchHolder = new RawFragmentBatch[fragProviders.length];
int i = 0;
try {
while (true) {
if (i >= fragProviders.length) {
state = BatchState.DONE;
return;
}
final RawFragmentBatch batch = getNext(i);
if (batch == null) {
if (!context.shouldContinue()) {
state = BatchState.STOP;
} else {
state = BatchState.DONE;
}
break;
}
if (batch.getHeader().getDef().getFieldCount() == 0) {
i++;
continue;
}
tempBatchHolder[i] = batch;
for (final SerializedField field : batch.getHeader().getDef().getFieldList()) {
@SuppressWarnings("resource") final ValueVector v = outgoingContainer.addOrGet(MaterializedField.create(field));
v.allocateNew();
}
break;
}
} catch (final IOException e) {
throw new DrillRuntimeException(e);
}
outgoingContainer = VectorContainer.canonicalize(outgoingContainer);
outgoingContainer.buildSchema(SelectionVectorMode.NONE);
}
use of org.apache.drill.exec.record.RawFragmentBatch in project drill by apache.
the class MergingRecordBatch method innerNext.
@Override
public IterOutcome innerNext() {
if (fragProviders.length == 0) {
return IterOutcome.NONE;
}
boolean schemaChanged = false;
if (prevBatchWasFull) {
logger.debug("Outgoing vectors were full on last iteration");
allocateOutgoing();
outgoingPosition = 0;
prevBatchWasFull = false;
}
if (!hasMoreIncoming) {
logger.debug("next() was called after all values have been processed");
outgoingPosition = 0;
return IterOutcome.NONE;
}
// lazy initialization
if (!hasRun) {
// first iteration is always a schema change
schemaChanged = true;
// set up each (non-empty) incoming record batch
final List<RawFragmentBatch> rawBatches = Lists.newArrayList();
int p = 0;
for (@SuppressWarnings("unused") final RawFragmentBatchProvider provider : fragProviders) {
RawFragmentBatch rawBatch;
// check if there is a batch in temp holder before calling getNext(), as it may have been used when building schema
if (tempBatchHolder[p] != null) {
rawBatch = tempBatchHolder[p];
tempBatchHolder[p] = null;
} else {
try {
rawBatch = getNext(p);
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
assert rawBatch != null : "rawBatch is null although context.shouldContinue() == true";
if (rawBatch.getHeader().getDef().getRecordCount() != 0) {
rawBatches.add(rawBatch);
} else {
// save an empty batch to use for schema purposes. ignore batch if it contains no fields, and thus no schema
if (emptyBatch == null && rawBatch.getHeader().getDef().getFieldCount() != 0) {
emptyBatch = rawBatch;
}
try {
while ((rawBatch = getNext(p)) != null && rawBatch.getHeader().getDef().getRecordCount() == 0) {
// Do nothing
}
if (rawBatch == null && !context.shouldContinue()) {
clearBatches(rawBatches);
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
clearBatches(rawBatches);
return IterOutcome.STOP;
}
if (rawBatch != null) {
rawBatches.add(rawBatch);
} else {
rawBatches.add(emptyBatch);
}
}
p++;
}
// allocate the incoming record batch loaders
senderCount = rawBatches.size();
incomingBatches = new RawFragmentBatch[senderCount];
batchOffsets = new int[senderCount];
batchLoaders = new RecordBatchLoader[senderCount];
for (int i = 0; i < senderCount; ++i) {
incomingBatches[i] = rawBatches.get(i);
batchLoaders[i] = new RecordBatchLoader(oContext.getAllocator());
}
// after this point all batches have moved to incomingBatches
rawBatches.clear();
int i = 0;
for (final RawFragmentBatch batch : incomingBatches) {
// initialize the incoming batchLoaders
final UserBitShared.RecordBatchDef rbd = batch.getHeader().getDef();
try {
batchLoaders[i].load(rbd, batch.getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException e) {
logger.error("MergingReceiver failed to load record batch from remote host. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
batch.release();
++batchOffsets[i];
++i;
}
// Canonicalize each incoming batch, so that vectors are alphabetically sorted based on SchemaPath.
for (final RecordBatchLoader loader : batchLoaders) {
loader.canonicalize();
}
// Ensure all the incoming batches have the identical schema.
if (!isSameSchemaAmongBatches(batchLoaders)) {
context.fail(new SchemaChangeException("Incoming batches for merging receiver have different schemas!"));
return IterOutcome.STOP;
}
// create the outgoing schema and vector container, and allocate the initial batch
final SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
for (final VectorWrapper<?> v : batchLoaders[0]) {
// add field to the output schema
bldr.addField(v.getField());
// allocate a new value vector
outgoingContainer.addOrGet(v.getField());
}
allocateOutgoing();
outgoingContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
// generate code for merge operations (copy and compare)
try {
merger = createMerger();
} catch (final SchemaChangeException e) {
logger.error("Failed to generate code for MergingReceiver. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
// allocate the priority queue with the generated comparator
this.pqueue = new PriorityQueue<>(fragProviders.length, new Comparator<Node>() {
@Override
public int compare(final Node node1, final Node node2) {
final int leftIndex = (node1.batchId << 16) + node1.valueIndex;
final int rightIndex = (node2.batchId << 16) + node2.valueIndex;
try {
return merger.doEval(leftIndex, rightIndex);
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
}
});
// populate the priority queue with initial values
for (int b = 0; b < senderCount; ++b) {
while (batchLoaders[b] != null && batchLoaders[b].getRecordCount() == 0) {
try {
final RawFragmentBatch batch = getNext(b);
incomingBatches[b] = batch;
if (batch != null) {
batchLoaders[b].load(batch.getHeader().getDef(), batch.getBody());
} else {
batchLoaders[b].clear();
batchLoaders[b] = null;
if (!context.shouldContinue()) {
return IterOutcome.STOP;
}
}
} catch (IOException | SchemaChangeException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (batchLoaders[b] != null) {
pqueue.add(new Node(b, 0));
}
}
hasRun = true;
// finished lazy initialization
}
while (!pqueue.isEmpty()) {
// pop next value from pq and copy to outgoing batch
final Node node = pqueue.peek();
if (!copyRecordToOutgoingBatch(node)) {
logger.debug("Outgoing vectors space is full; breaking");
prevBatchWasFull = true;
}
pqueue.poll();
if (node.valueIndex == batchLoaders[node.batchId].getRecordCount() - 1) {
// reached the end of an incoming record batch
RawFragmentBatch nextBatch;
try {
nextBatch = getNext(node.batchId);
while (nextBatch != null && nextBatch.getHeader().getDef().getRecordCount() == 0) {
nextBatch = getNext(node.batchId);
}
assert nextBatch != null || inputCounts[node.batchId] == outputCounts[node.batchId] : String.format("Stream %d input count: %d output count %d", node.batchId, inputCounts[node.batchId], outputCounts[node.batchId]);
if (nextBatch == null && !context.shouldContinue()) {
return IterOutcome.STOP;
}
} catch (final IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
incomingBatches[node.batchId] = nextBatch;
if (nextBatch == null) {
// batch is empty
boolean allBatchesEmpty = true;
for (final RawFragmentBatch batch : incomingBatches) {
// see if all batches are empty so we can return OK_* or NONE
if (batch != null) {
allBatchesEmpty = false;
break;
}
}
if (allBatchesEmpty) {
hasMoreIncoming = false;
break;
}
// ignored in subsequent iterations.
if (prevBatchWasFull) {
break;
} else {
continue;
}
}
final UserBitShared.RecordBatchDef rbd = incomingBatches[node.batchId].getHeader().getDef();
try {
batchLoaders[node.batchId].load(rbd, incomingBatches[node.batchId].getBody());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean catch clause below.
} catch (final SchemaChangeException ex) {
context.fail(ex);
return IterOutcome.STOP;
}
incomingBatches[node.batchId].release();
batchOffsets[node.batchId] = 0;
// add front value from batch[x] to priority queue
if (batchLoaders[node.batchId].getRecordCount() != 0) {
pqueue.add(new Node(node.batchId, 0));
}
} else {
pqueue.add(new Node(node.batchId, node.valueIndex + 1));
}
if (prevBatchWasFull) {
break;
}
}
// set the value counts in the outgoing vectors
for (final VectorWrapper<?> vw : outgoingContainer) {
vw.getValueVector().getMutator().setValueCount(outgoingPosition);
}
if (pqueue.isEmpty()) {
state = BatchState.DONE;
}
if (schemaChanged) {
return IterOutcome.OK_NEW_SCHEMA;
} else {
return IterOutcome.OK;
}
}
use of org.apache.drill.exec.record.RawFragmentBatch in project drill by apache.
the class IncomingDataBatch method newRawFragmentBatch.
/**
* Create a new RawFragmentBatch based on this incoming data batch that is transferred into the provided allocator.
* Also increments the AckSender to expect one additional return message.
*
* @param allocator
* Target allocator that should be associated with data underlying this batch.
* @return The newly created RawFragmentBatch
*/
public RawFragmentBatch newRawFragmentBatch(final BufferAllocator allocator) {
final DrillBuf transferredBuffer = body == null ? null : body.transferOwnership(allocator).buffer;
sender.increment();
return new RawFragmentBatch(header, transferredBuffer, sender);
}
use of org.apache.drill.exec.record.RawFragmentBatch in project drill by apache.
the class TestBitBitKerberos method setupFragmentContextAndManager.
private static void setupFragmentContextAndManager() {
final FragmentContext fcontext = new MockUp<FragmentContext>() {
@SuppressWarnings("unused")
BufferAllocator getAllocator() {
return c1.getAllocator();
}
}.getMockInstance();
manager = new MockUp<FragmentManager>() {
int v = 0;
@Mock
boolean handle(IncomingDataBatch batch) throws FragmentSetupException, IOException {
try {
v++;
if (v % 10 == 0) {
System.out.println("sleeping.");
Thread.sleep(3000);
}
} catch (InterruptedException e) {
}
RawFragmentBatch rfb = batch.newRawFragmentBatch(c1.getAllocator());
rfb.sendOk();
rfb.release();
return true;
}
@SuppressWarnings("unused")
public FragmentContext getFragmentContext() {
return fcontext;
}
}.getMockInstance();
}
Aggregations