use of org.apache.drill.exec.vector.IntVector in project drill by apache.
the class TestFixedWidthWriter method testWrite.
/**
* Basic test to write a contiguous set of values, enough to cause
* the vector to double in size twice, then read back the values.
*/
@Test
public void testWrite() {
try (IntVector vector = allocVector(1000)) {
TestIndex index = new TestIndex();
IntColumnWriter writer = makeWriter(vector, index);
writer.startWrite();
// Write integers.
// Write enough that the vector is resized.
long origAddr = vector.getBuffer().addr();
for (int i = 0; i < 3000; i++) {
index.index = i;
writer.setInt(i * 10);
}
writer.endWrite();
// Should have been reallocated.
assertNotEquals(origAddr, vector.getBuffer().addr());
for (int i = 0; i < 3000; i++) {
assertEquals(i * 10, vector.getAccessor().get(i));
}
}
}
use of org.apache.drill.exec.vector.IntVector in project drill by apache.
the class LateralJoinBatch method crossJoinAndOutputRecords.
/**
* Main entry point for producing the output records. This method populates
* the output batch after cross join of the record in a given left batch at
* left index and all the corresponding rows in right batches produced by
* Unnest for current left batch. For each call to this function number of
* records copied in output batch is limited to maximum rows output batch can
* hold or the number of rows in right incoming batch
*/
private void crossJoinAndOutputRecords() {
final int rightRecordCount = right.getRecordCount();
// If there is no record in right batch just return current index in output batch
if (rightRecordCount <= 0) {
return;
}
// Check if right batch is empty since we have to handle left join case
Preconditions.checkState(rightJoinIndex != -1, "Right batch record count is >0 but index is -1");
int currentOutIndex = outputIndex;
// Number of rows that can be copied in output batch
int maxAvailableRowSlot = maxOutputRowCount - currentOutIndex;
if (logger.isDebugEnabled()) {
logger.debug("Producing output for leftIndex: {}, rightIndex: {}, rightRecordCount: {}, outputIndex: {} and " + "availableSlotInOutput: {}", leftJoinIndex, rightJoinIndex, rightRecordCount, outputIndex, maxAvailableRowSlot);
logger.debug("Output Batch stats before copying new data: {}", new RecordBatchSizer(this));
}
// Assuming that first vector in right batch is for implicitColumn.
// get a mapping of number of rows for each rowId present in current right side batch
// final Map<Integer, Integer> indexToFreq = getRowIdToRowCountMapping();
final IntVector rowIdVector = (IntVector) implicitVector;
final int leftRecordCount = left.getRecordCount();
// rightBatch end or vice-versa
while (maxAvailableRowSlot > 0 && rightJoinIndex < rightRecordCount) {
// Get rowId from current right row
int currentRowId = rowIdVector.getAccessor().get(rightJoinIndex);
int leftRowId = leftJoinIndex + 1;
int numRowsCopied = 0;
if (currentRowId > leftRecordCount || leftJoinIndex > leftRecordCount) {
// the arguments.
throw new IllegalStateException(String.format("Either RowId in right batch is greater than total records in " + "left batch or all rows in left batch is processed but there are still rows in right batch. " + "Details[RightRowId: %s, LeftRecordCount: %s, LeftJoinIndex: %s, RightJoinIndex: %s]", currentRowId, leftRecordCount, leftJoinIndex, rightJoinIndex));
}
if (logger.isTraceEnabled()) {
// Inside the if condition to eliminate parameter boxing cost
logger.trace("leftRowId and currentRowId are: {}, {}", leftRowId, currentRowId);
}
// and numRowsCopied. Also set leftMatchFound to true to indicate when to increase leftJoinIndex.
if (leftRowId == currentRowId) {
// there is a match
matchedRecordFound = true;
numRowsCopied = 1;
// numRowsCopied = Math.min(indexToFreq.get(currentRowId), maxAvailableRowSlot);
emitRight(rightJoinIndex, outputIndex, numRowsCopied);
emitLeft(leftJoinIndex, outputIndex, numRowsCopied);
outputIndex += numRowsCopied;
rightJoinIndex += numRowsCopied;
} else if (leftRowId < currentRowId) {
// and reset the matchedRecordFound flag
if (matchedRecordFound) {
matchedRecordFound = false;
++leftJoinIndex;
continue;
} else {
// and increase the indexes properly to reflect that
if (JoinRelType.LEFT == popConfig.getJoinType()) {
numRowsCopied = 1;
emitLeft(leftJoinIndex, outputIndex, numRowsCopied);
++outputIndex;
}
++leftJoinIndex;
}
} else {
Preconditions.checkState(leftRowId <= currentRowId, "Unexpected case where rowId " + "%s in right batch of lateral is smaller than rowId %s in left batch being processed", currentRowId, leftRowId);
}
// Update the max available rows slot in output batch
maxAvailableRowSlot -= numRowsCopied;
}
}
use of org.apache.drill.exec.vector.IntVector in project drill by apache.
the class HashPartition method allocateNewCurrentBatchAndHV.
/**
* Allocate a new current Vector Container and current HV vector
*/
public void allocateNewCurrentBatchAndHV() {
// skip when the inner is whole in memory
if (outerBatchAllocNotNeeded) {
return;
}
currentBatch = allocateNewVectorContainer(processingOuter ? probeBatch : buildBatch);
currHVVector = new IntVector(MaterializedField.create(HASH_VALUE_COLUMN_NAME, HVtype), allocator);
currHVVector.allocateNew(recordsPerBatch);
}
use of org.apache.drill.exec.vector.IntVector in project drill by apache.
the class HashPartition method buildContainersHashTableAndHelper.
/**
* Creates the hash table and join helper for this partition.
* This method should only be called after all the build side records
* have been consumed.
*/
public void buildContainersHashTableAndHelper() throws SchemaChangeException {
// no building for spilled partitions
if (isSpilled) {
return;
}
containers = new ArrayList<>();
hashTable.updateInitialCapacity((int) getNumInMemoryRecords());
for (int curr = 0; curr < partitionBatchesCount; curr++) {
VectorContainer nextBatch = tmpBatchesList.get(curr);
final int currentRecordCount = nextBatch.getRecordCount();
// For every incoming build batch, we create a matching helper batch
if (!semiJoin) {
hjHelper.addNewBatch(currentRecordCount);
}
// Holder contains the global index where the key is hashed into using the hash table
final IndexPointer htIndex = new IndexPointer();
assert nextBatch != null;
assert probeBatch != null;
hashTable.updateIncoming(nextBatch, probeBatch);
IntVector HV_vector = (IntVector) nextBatch.getLast();
for (int recInd = 0; recInd < currentRecordCount; recInd++) {
int hashCode = HV_vector.getAccessor().get(recInd);
try {
hashTable.put(recInd, htIndex, hashCode, BATCH_SIZE);
} catch (RetryAfterSpillException RE) {
throw new OutOfMemoryException("HT put");
}
/* Use the global index returned by the hash table, to store
* the current record index and batch index. This will be used
* later when we probe and find a match.
*/
if (!semiJoin) {
hjHelper.setCurrentIndex(htIndex.value, curr, /* buildBatchIndex */
recInd);
}
}
containers.add(nextBatch);
}
// the inner is whole in memory, no need for an outer batch
outerBatchAllocNotNeeded = true;
}
Aggregations