use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.
the class HashJoinHelper method setCurrentIndex.
public void setCurrentIndex(int keyIndex, int batchIndex, int recordIndex) throws SchemaChangeException {
/* set the current record batch index and the index
* within the batch at the specified keyIndex. The keyIndex
* denotes the global index where the key for this record is
* stored in the hash table
*/
int batchIdx = keyIndex / HashTable.BATCH_SIZE;
int offsetIdx = keyIndex % HashTable.BATCH_SIZE;
if (keyIndex >= (HashTable.BATCH_SIZE * startIndices.size())) {
// allocate a new batch
addStartIndexBatch();
}
SelectionVector4 startIndex = startIndices.get(batchIdx);
int linkIndex;
// If head of the list is empty, insert current index at this position
if ((linkIndex = (startIndex.get(offsetIdx))) == INDEX_EMPTY) {
startIndex.set(offsetIdx, batchIndex, recordIndex);
} else {
/* Head of this list is not empty, if the first link
* is empty insert there
*/
batchIdx = linkIndex >>> SHIFT_SIZE;
offsetIdx = linkIndex & Character.MAX_VALUE;
SelectionVector4 link = buildInfoList.get(batchIdx).getLinks();
int firstLink = link.get(offsetIdx);
if (firstLink == INDEX_EMPTY) {
link.set(offsetIdx, batchIndex, recordIndex);
} else {
/* Insert the current value as the first link and
* make the current first link as its next
*/
int firstLinkBatchIdx = firstLink >>> SHIFT_SIZE;
int firstLinkOffsetIDx = firstLink & Character.MAX_VALUE;
SelectionVector4 nextLink = buildInfoList.get(batchIndex).getLinks();
nextLink.set(recordIndex, firstLinkBatchIdx, firstLinkOffsetIDx);
link.set(offsetIdx, batchIndex, recordIndex);
}
}
}
use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.
the class MSortTemplate method setup.
@Override
public void setup(final FragmentContext context, final BufferAllocator allocator, final SelectionVector4 vector4, final VectorContainer hyperBatch, int outputBatchSize) throws SchemaChangeException {
// we pass in the local hyperBatch since that is where we'll be reading data.
Preconditions.checkNotNull(vector4);
this.vector4 = vector4.createNewWrapperCurrent();
this.context = context;
vector4.clear();
doSetup(context, hyperBatch, null);
// Populate the queue with the offset in the SV4 of each
// batch. Note that this is expensive as it requires a scan
// of all items to be sorted: potentially millions.
runStarts.add(0);
int batch = 0;
final int totalCount = this.vector4.getTotalCount();
for (int i = 0; i < totalCount; i++) {
final int newBatch = this.vector4.get(i) >>> 16;
if (newBatch == batch) {
continue;
} else if (newBatch == batch + 1) {
runStarts.add(i);
batch = newBatch;
} else {
throw new UnsupportedOperationException(String.format("Missing batch. batch: %d newBatch: %d", batch, newBatch));
}
}
// Create a temporary SV4 to hold the merged results.
@SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * totalCount);
desiredRecordBatchCount = Math.min(outputBatchSize, Character.MAX_VALUE);
desiredRecordBatchCount = Math.min(desiredRecordBatchCount, totalCount);
aux = new SelectionVector4(drillBuf, totalCount, desiredRecordBatchCount);
}
use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.
the class MSortTemplate method sort.
@Override
public void sort(final VectorContainer container) {
final Stopwatch watch = Stopwatch.createStarted();
while (runStarts.size() > 1) {
// check if we're cancelled/failed frequently
if (!context.shouldContinue()) {
return;
}
int outIndex = 0;
final Queue<Integer> newRunStarts = Queues.newLinkedBlockingQueue();
newRunStarts.add(outIndex);
final int size = runStarts.size();
for (int i = 0; i < size / 2; i++) {
final int left = runStarts.poll();
final int right = runStarts.poll();
Integer end = runStarts.peek();
if (end == null) {
end = vector4.getTotalCount();
}
outIndex = merge(left, right, end, outIndex);
if (outIndex < vector4.getTotalCount()) {
newRunStarts.add(outIndex);
}
}
if (outIndex < vector4.getTotalCount()) {
copyRun(outIndex, vector4.getTotalCount());
}
final SelectionVector4 tmp = aux.createNewWrapperCurrent(desiredRecordBatchCount);
aux.clear();
aux = vector4.createNewWrapperCurrent(desiredRecordBatchCount);
vector4.clear();
vector4 = tmp.createNewWrapperCurrent(desiredRecordBatchCount);
tmp.clear();
runStarts = newRunStarts;
}
aux.clear();
}
use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.
the class TestPartitionSender method testPartitionSenderCostToThreads.
@Test
public /**
* Main test to go over different scenarios
* @throws Exception
*/
void testPartitionSenderCostToThreads() throws Exception {
final VectorContainer container = new VectorContainer();
container.buildSchema(SelectionVectorMode.FOUR_BYTE);
final SelectionVector4 sv = Mockito.mock(SelectionVector4.class, "SelectionVector4");
Mockito.when(sv.getCount()).thenReturn(100);
Mockito.when(sv.getTotalCount()).thenReturn(100);
for (int i = 0; i < 100; i++) {
Mockito.when(sv.get(i)).thenReturn(i);
}
final TopNBatch.SimpleRecordBatch incoming = new TopNBatch.SimpleRecordBatch(container, sv, null);
updateTestCluster(DRILLBITS_COUNT, null);
test("ALTER SESSION SET `planner.slice_target`=1");
String plan = getPlanInString("EXPLAIN PLAN FOR " + groupByQuery, JSON_FORMAT);
System.out.println("Plan: " + plan);
final DrillbitContext drillbitContext = getDrillbitContext();
final PhysicalPlanReader planReader = drillbitContext.getPlanReader();
final PhysicalPlan physicalPlan = planReader.readPhysicalPlan(plan);
final Fragment rootFragment = PopUnitTestBase.getRootFragmentFromPlanString(planReader, plan);
final PlanningSet planningSet = new PlanningSet();
final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(config);
// Create a planningSet to get the assignment of major fragment ids to fragments.
PARALLELIZER.initFragmentWrappers(rootFragment, planningSet);
final List<PhysicalOperator> operators = physicalPlan.getSortedOperators(false);
// get HashToRandomExchange physical operator
HashToRandomExchange hashToRandomExchange = null;
for (PhysicalOperator operator : operators) {
if (operator instanceof HashToRandomExchange) {
hashToRandomExchange = (HashToRandomExchange) operator;
break;
}
}
final OptionList options = new OptionList();
// try multiple scenarios with different set of options
options.add(OptionValue.createLong(OptionType.SESSION, "planner.slice_target", 1));
testThreadsHelper(hashToRandomExchange, drillbitContext, options, incoming, registry, planReader, planningSet, rootFragment, 1);
options.clear();
options.add(OptionValue.createLong(OptionType.SESSION, "planner.slice_target", 1));
options.add(OptionValue.createLong(OptionType.SESSION, "planner.partitioner_sender_max_threads", 10));
hashToRandomExchange.setCost(1000);
testThreadsHelper(hashToRandomExchange, drillbitContext, options, incoming, registry, planReader, planningSet, rootFragment, 10);
options.clear();
options.add(OptionValue.createLong(OptionType.SESSION, "planner.slice_target", 1000));
options.add(OptionValue.createLong(OptionType.SESSION, "planner.partitioner_sender_threads_factor", 2));
hashToRandomExchange.setCost(14000);
testThreadsHelper(hashToRandomExchange, drillbitContext, options, incoming, registry, planReader, planningSet, rootFragment, 2);
}
use of org.apache.drill.exec.record.selection.SelectionVector4 in project drill by apache.
the class OrderedPartitionRecordBatch method saveSamples.
@SuppressWarnings("resource")
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException {
recordsSampled = 0;
IterOutcome upstream;
// Start collecting batches until recordsToSample records have been collected
SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
WritableBatch batch = null;
CachedVectorContainer sampleToSave = null;
VectorContainer containerToCache = new VectorContainer();
try {
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
outer: while (recordsSampled < recordsToSample) {
upstream = next(incoming);
switch(upstream) {
case NONE:
case NOT_YET:
case STOP:
upstreamNone = true;
break outer;
default:
}
builder.add(incoming);
recordsSampled += incoming.getRecordCount();
if (upstream == IterOutcome.NONE) {
break;
}
}
VectorContainer sortedSamples = new VectorContainer();
builder.build(context, sortedSamples);
// Sort the records according the orderings given in the configuration
Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
SelectionVector4 sv4 = builder.getSv4();
sorter.setup(context, sv4, sortedSamples);
sorter.sort(sv4, sortedSamples);
// Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
// Uses the
// the expressions from the Orderings to populate each column. There is one column for each Ordering in
// popConfig.orderings.
List<ValueVector> localAllocationVectors = Lists.newArrayList();
SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
int allocationSize = 50;
while (true) {
for (ValueVector vv : localAllocationVectors) {
AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
}
if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
break;
} else {
containerToCache.zeroVectors();
allocationSize *= 2;
}
}
for (VectorWrapper<?> vw : containerToCache) {
vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
}
containerToCache.setRecordCount(copier.getOutputRecords());
// Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
// into a serializable wrapper object, and then add to distributed map
batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
sampleToSave = new CachedVectorContainer(batch, context.getAllocator());
mmap.put(mapKey, sampleToSave);
this.sampledIncomingBatches = builder.getHeldRecordBatches();
} finally {
builder.clear();
builder.close();
if (batch != null) {
batch.clear();
}
containerToCache.clear();
if (sampleToSave != null) {
sampleToSave.clear();
}
}
return true;
}
Aggregations