use of org.apache.drill.exec.record.TransferPair in project drill by apache.
the class LimitRecordBatch method setupNewSchema.
@Override
protected boolean setupNewSchema() throws SchemaChangeException {
container.zeroVectors();
transfers.clear();
for (final VectorWrapper<?> v : incoming) {
final TransferPair pair = v.getValueVector().makeTransferPair(container.addOrGet(v.getField(), callBack));
transfers.add(pair);
}
final BatchSchema.SelectionVectorMode svMode = incoming.getSchema().getSelectionVectorMode();
switch(svMode) {
case NONE:
break;
case TWO_BYTE:
this.incomingSv = incoming.getSelectionVector2();
break;
default:
throw new UnsupportedOperationException();
}
if (container.isSchemaChanged()) {
container.buildSchema(BatchSchema.SelectionVectorMode.TWO_BYTE);
return true;
}
return false;
}
use of org.apache.drill.exec.record.TransferPair in project drill by apache.
the class FlattenTemplate method flattenRecords.
@Override
public final int flattenRecords(final int recordCount, final int firstOutputIndex, final Flattener.Monitor monitor) {
switch(svMode) {
case FOUR_BYTE:
throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
case TWO_BYTE:
throw new UnsupportedOperationException("Flatten does not support selection vector inputs.");
case NONE:
if (innerValueIndex == -1) {
innerValueIndex = 0;
}
final int initialInnerValueIndex = currentInnerValueIndex;
// restore state to local stack
int valueIndexLocal = valueIndex;
int innerValueIndexLocal = innerValueIndex;
int currentInnerValueIndexLocal = currentInnerValueIndex;
outer: {
int outputIndex = firstOutputIndex;
int recordsThisCall = 0;
final int valueCount = accessor.getValueCount();
for (; valueIndexLocal < valueCount; valueIndexLocal++) {
final int innerValueCount = accessor.getInnerValueCountAt(valueIndexLocal);
for (; innerValueIndexLocal < innerValueCount; innerValueIndexLocal++) {
// If we've hit the batch size limit, stop and flush what we've got so far.
if (recordsThisCall == outputLimit) {
if (bigRecords) {
/*
* We got to the limit we used before, but did we go over
* the bigRecordsBufferSize in the second half of the batch? If
* so, we'll need to adjust the batch limits.
*/
adjustBatchLimits(1, monitor, recordsThisCall);
}
// Flush this batch.
break outer;
}
/*
* At the moment, the output record includes the input record, so for very
* large records that we're flattening, we're carrying forward the original
* record as well as the flattened element. We've seen a case where flattening a 4MB
* record with a 20,000 element array causing memory usage to explode. To avoid
* that until we can push down the selected fields to operators like this, we
* also limit the amount of memory in use at one time.
*
* We have to have written at least one record to be able to get a buffer that will
* have a real allocator, so we have to do this lazily. We won't check the limit
* for the first two records, but that keeps this simple.
*/
if (bigRecords) {
/*
* If we're halfway through the outputLimit, check on our memory
* usage so far.
*/
if (recordsThisCall == outputLimit / 2) {
/*
* If we've used more than half the space we've used for big records
* in the past, we've seen even bigger records than before, so stop and
* see if we need to flush here before we go over bigRecordsBufferSize
* memory usage, and reduce the outputLimit further before we continue
* with the next batch.
*/
if (adjustBatchLimits(2, monitor, recordsThisCall)) {
break outer;
}
}
} else {
if (outputAllocator.getAllocatedMemory() > OUTPUT_MEMORY_LIMIT) {
/*
* We're dealing with big records. Reduce the outputLimit to
* the current record count, and take note of how much space the
* vectors report using for that. We'll use those numbers as limits
* going forward in order to avoid allocating more memory.
*/
bigRecords = true;
outputLimit = Math.min(recordsThisCall, outputLimit);
if (outputLimit < 1) {
throw new IllegalStateException("flatten outputLimit (" + outputLimit + ") won't make progress");
}
/*
* This will differ from what the allocator reports because of
* overhead. But the allocator check is much cheaper to do, so we
* only compute this at selected times.
*/
bigRecordsBufferSize = monitor.getBufferSizeFor(recordsThisCall);
// Stop and flush.
break outer;
}
}
try {
doEval(valueIndexLocal, outputIndex);
} catch (OversizedAllocationException ex) {
// unable to flatten due to a soft buffer overflow. split the batch here and resume execution.
logger.debug("Reached allocation limit. Splitting the batch at input index: {} - inner index: {} - current completed index: {}", valueIndexLocal, innerValueIndexLocal, currentInnerValueIndexLocal);
/*
* TODO
* We can't further reduce the output limits here because it won't have
* any effect. The vectors have already gotten large, and there's currently
* no way to reduce their size. Ideally, we could reduce the outputLimit,
* and reduce the size of the currently used vectors.
*/
break outer;
} catch (SchemaChangeException e) {
throw new UnsupportedOperationException(e);
}
outputIndex++;
currentInnerValueIndexLocal++;
++recordsThisCall;
}
innerValueIndexLocal = 0;
}
}
// save state to heap
valueIndex = valueIndexLocal;
innerValueIndex = innerValueIndexLocal;
currentInnerValueIndex = currentInnerValueIndexLocal;
// transfer the computed range
final int delta = currentInnerValueIndexLocal - initialInnerValueIndex;
for (TransferPair t : transfers) {
t.splitAndTransfer(initialInnerValueIndex, delta);
}
return delta;
default:
throw new UnsupportedOperationException();
}
}
use of org.apache.drill.exec.record.TransferPair in project drill by apache.
the class BatchGroup method getBatch.
private VectorContainer getBatch() throws IOException {
assert fs != null;
assert path != null;
if (inputStream == null) {
inputStream = fs.open(path);
}
VectorAccessibleSerializable vas = new VectorAccessibleSerializable(allocator);
Stopwatch watch = Stopwatch.createStarted();
vas.readFromStream(inputStream);
VectorContainer c = vas.get();
if (schema != null) {
c = SchemaUtil.coerceContainer(c, schema, context);
}
logger.trace("Took {} us to read {} records", watch.elapsed(TimeUnit.MICROSECONDS), c.getRecordCount());
spilledBatches--;
currentContainer.zeroVectors();
Iterator<VectorWrapper<?>> wrapperIterator = c.iterator();
for (VectorWrapper w : currentContainer) {
TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector());
pair.transfer();
}
currentContainer.setRecordCount(c.getRecordCount());
c.zeroVectors();
return c;
}
Aggregations