use of org.apache.drill.exec.record.WritableBatch in project drill by apache.
the class BatchGroup method addBatch.
public void addBatch(VectorContainer newContainer) throws IOException {
assert fs != null;
assert path != null;
if (outputStream == null) {
outputStream = fs.create(path);
}
int recordCount = newContainer.getRecordCount();
WritableBatch batch = WritableBatch.getBatchNoHVWrap(recordCount, newContainer, false);
VectorAccessibleSerializable outputBatch = new VectorAccessibleSerializable(batch, allocator);
Stopwatch watch = Stopwatch.createStarted();
outputBatch.writeToStream(outputStream);
newContainer.zeroVectors();
logger.debug("Took {} us to spill {} records", watch.elapsed(TimeUnit.MICROSECONDS), recordCount);
spilledBatches++;
}
use of org.apache.drill.exec.record.WritableBatch in project drill by apache.
the class TraceRecordBatch method doWork.
/**
* Function is invoked for every record batch and it simply dumps the buffers associated with all the value vectors in
* this record batch to a log file.
*/
@Override
protected IterOutcome doWork() {
boolean incomingHasSv2 = incoming.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE;
if (incomingHasSv2) {
sv = incoming.getSelectionVector2();
} else {
sv = null;
}
WritableBatch batch = WritableBatch.getBatchNoHVWrap(incoming.getRecordCount(), incoming, incomingHasSv2);
VectorAccessibleSerializable wrap = new VectorAccessibleSerializable(batch, sv, oContext.getAllocator());
try {
wrap.writeToStreamAndRetain(fos);
} catch (IOException e) {
throw new RuntimeException(e);
}
batch.reconstructContainer(localAllocator, container);
if (incomingHasSv2) {
sv = wrap.getSv2();
}
return IterOutcome.OK;
}
use of org.apache.drill.exec.record.WritableBatch in project drill by apache.
the class VectorRecordMaterializer method convertNext.
public QueryWritableBatch convertNext() {
//batch.getWritableBatch().getDef().getRecordCount()
WritableBatch w = batch.getWritableBatch().transfer(allocator);
QueryData header = //
QueryData.newBuilder().setQueryId(//
queryId).setRowCount(//
batch.getRecordCount()).setDef(w.getDef()).build();
QueryWritableBatch batch = new QueryWritableBatch(header, w.getBuffers());
return batch;
}
use of org.apache.drill.exec.record.WritableBatch in project drill by apache.
the class OrderedPartitionRecordBatch method saveSamples.
@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
recordsSampled = 0;
IterOutcome upstream;
// Start collecting batches until recordsToSample records have been collected
SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
WritableBatch batch = null;
CachedVectorContainer sampleToSave = null;
VectorContainer containerToCache = new VectorContainer();
try {
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
outer: while (recordsSampled < recordsToSample) {
upstream = next(incoming);
switch(upstream) {
case NONE:
case NOT_YET:
case STOP:
upstreamNone = true;
break outer;
default:
}
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
if (upstream == IterOutcome.NONE) {
break;
}
}
VectorContainer sortedSamples = new VectorContainer();
builder.build(context, sortedSamples);
// Sort the records according the orderings given in the configuration
Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
SelectionVector4 sv4 = builder.getSv4();
sorter.setup(context, sv4, sortedSamples);
sorter.sort(sv4, sortedSamples);
// Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
// Uses the
// the expressions from the Orderings to populate each column. There is one column for each Ordering in
// popConfig.orderings.
List<ValueVector> localAllocationVectors = Lists.newArrayList();
SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
break;
} else {
containerToCache.zeroVectors();
allocationSize *= 2;
}
}
for (VectorWrapper<?> vw : containerToCache) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
containerToCache.setRecordCount(copier.getOutputRecords());
// Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
// into a serializable wrapper object, and then add to distributed map
batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
mmap.put(mapKey, sampleToSave);
this.sampledIncomingBatches = builder.getHeldRecordBatches();
} finally {
builder.clear();
builder.close();
if (batch != null) {
batch.clear();
}
containerToCache.clear();
if (sampleToSave != null) {
sampleToSave.clear();
}
}
return true;
}
use of org.apache.drill.exec.record.WritableBatch in project drill by apache.
the class OrderedPartitionRecordBatch method buildTable.
private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException {
// Get all samples from distributed map
@SuppressWarnings("resource") SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator());
final VectorContainer allSamplesContainer = new VectorContainer();
final VectorContainer candidatePartitionTable = new VectorContainer();
CachedVectorContainer wrap = null;
try {
for (CachedVectorContainer w : mmap.get(mapKey)) {
containerBuilder.add(w.get());
}
containerBuilder.build(context, allSamplesContainer);
List<Ordering> orderDefs = Lists.newArrayList();
int i = 0;
for (Ordering od : popConfig.getOrderings()) {
SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
}
// sort the data incoming samples.
@SuppressWarnings("resource") SelectionVector4 newSv4 = containerBuilder.getSv4();
Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
sorter.setup(context, newSv4, allSamplesContainer);
sorter.sort(newSv4, allSamplesContainer);
// Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
// Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
SampleCopier copier = null;
List<ValueVector> localAllocationVectors = Lists.newArrayList();
copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
for (VectorWrapper<?> vw : candidatePartitionTable) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
break;
} else {
candidatePartitionTable.zeroVectors();
allocationSize *= 2;
}
}
candidatePartitionTable.setRecordCount(copier.getOutputRecords());
@SuppressWarnings("resource") WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);
} finally {
candidatePartitionTable.clear();
allSamplesContainer.clear();
containerBuilder.clear();
containerBuilder.close();
if (wrap != null) {
wrap.clear();
}
}
}
Aggregations