Search in sources :

Example 66 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by apache.

the class TestFixedWidthWriter method testWrite.

/**
 * Basic test to write a contiguous set of values, enough to cause
 * the vector to double in size twice, then read back the values.
 */
@Test
public void testWrite() {
    try (IntVector vector = allocVector(1000)) {
        TestIndex index = new TestIndex();
        IntColumnWriter writer = makeWriter(vector, index);
        writer.startWrite();
        // Write integers.
        // Write enough that the vector is resized.
        long origAddr = vector.getBuffer().addr();
        for (int i = 0; i < 3000; i++) {
            index.index = i;
            writer.setInt(i * 10);
        }
        writer.endWrite();
        // Should have been reallocated.
        assertNotEquals(origAddr, vector.getBuffer().addr());
        for (int i = 0; i < 3000; i++) {
            assertEquals(i * 10, vector.getAccessor().get(i));
        }
    }
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) IntColumnWriter(org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 67 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by apache.

the class LateralJoinBatch method crossJoinAndOutputRecords.

/**
 * Main entry point for producing the output records. This method populates
 * the output batch after cross join of the record in a given left batch at
 * left index and all the corresponding rows in right batches produced by
 * Unnest for current left batch. For each call to this function number of
 * records copied in output batch is limited to maximum rows output batch can
 * hold or the number of rows in right incoming batch
 */
private void crossJoinAndOutputRecords() {
    final int rightRecordCount = right.getRecordCount();
    // If there is no record in right batch just return current index in output batch
    if (rightRecordCount <= 0) {
        return;
    }
    // Check if right batch is empty since we have to handle left join case
    Preconditions.checkState(rightJoinIndex != -1, "Right batch record count is >0 but index is -1");
    int currentOutIndex = outputIndex;
    // Number of rows that can be copied in output batch
    int maxAvailableRowSlot = maxOutputRowCount - currentOutIndex;
    if (logger.isDebugEnabled()) {
        logger.debug("Producing output for leftIndex: {}, rightIndex: {}, rightRecordCount: {}, outputIndex: {} and " + "availableSlotInOutput: {}", leftJoinIndex, rightJoinIndex, rightRecordCount, outputIndex, maxAvailableRowSlot);
        logger.debug("Output Batch stats before copying new data: {}", new RecordBatchSizer(this));
    }
    // Assuming that first vector in right batch is for implicitColumn.
    // get a mapping of number of rows for each rowId present in current right side batch
    // final Map<Integer, Integer> indexToFreq = getRowIdToRowCountMapping();
    final IntVector rowIdVector = (IntVector) implicitVector;
    final int leftRecordCount = left.getRecordCount();
    // rightBatch end or vice-versa
    while (maxAvailableRowSlot > 0 && rightJoinIndex < rightRecordCount) {
        // Get rowId from current right row
        int currentRowId = rowIdVector.getAccessor().get(rightJoinIndex);
        int leftRowId = leftJoinIndex + 1;
        int numRowsCopied = 0;
        if (currentRowId > leftRecordCount || leftJoinIndex > leftRecordCount) {
            // the arguments.
            throw new IllegalStateException(String.format("Either RowId in right batch is greater than total records in " + "left batch or all rows in left batch is processed but there are still rows in right batch. " + "Details[RightRowId: %s, LeftRecordCount: %s, LeftJoinIndex: %s, RightJoinIndex: %s]", currentRowId, leftRecordCount, leftJoinIndex, rightJoinIndex));
        }
        if (logger.isTraceEnabled()) {
            // Inside the if condition to eliminate parameter boxing cost
            logger.trace("leftRowId and currentRowId are: {}, {}", leftRowId, currentRowId);
        }
        // and numRowsCopied. Also set leftMatchFound to true to indicate when to increase leftJoinIndex.
        if (leftRowId == currentRowId) {
            // there is a match
            matchedRecordFound = true;
            numRowsCopied = 1;
            // numRowsCopied = Math.min(indexToFreq.get(currentRowId), maxAvailableRowSlot);
            emitRight(rightJoinIndex, outputIndex, numRowsCopied);
            emitLeft(leftJoinIndex, outputIndex, numRowsCopied);
            outputIndex += numRowsCopied;
            rightJoinIndex += numRowsCopied;
        } else if (leftRowId < currentRowId) {
            // and reset the matchedRecordFound flag
            if (matchedRecordFound) {
                matchedRecordFound = false;
                ++leftJoinIndex;
                continue;
            } else {
                // and increase the indexes properly to reflect that
                if (JoinRelType.LEFT == popConfig.getJoinType()) {
                    numRowsCopied = 1;
                    emitLeft(leftJoinIndex, outputIndex, numRowsCopied);
                    ++outputIndex;
                }
                ++leftJoinIndex;
            }
        } else {
            Preconditions.checkState(leftRowId <= currentRowId, "Unexpected case where rowId " + "%s in right batch of lateral is smaller than rowId %s in left batch being processed", currentRowId, leftRowId);
        }
        // Update the max available rows slot in output batch
        maxAvailableRowSlot -= numRowsCopied;
    }
}
Also used : RecordBatchSizer(org.apache.drill.exec.record.RecordBatchSizer) IntVector(org.apache.drill.exec.vector.IntVector)

Example 68 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by apache.

the class HashPartition method allocateNewCurrentBatchAndHV.

/**
 *  Allocate a new current Vector Container and current HV vector
 */
public void allocateNewCurrentBatchAndHV() {
    // skip when the inner is whole in memory
    if (outerBatchAllocNotNeeded) {
        return;
    }
    currentBatch = allocateNewVectorContainer(processingOuter ? probeBatch : buildBatch);
    currHVVector = new IntVector(MaterializedField.create(HASH_VALUE_COLUMN_NAME, HVtype), allocator);
    currHVVector.allocateNew(recordsPerBatch);
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector)

Example 69 with IntVector

use of org.apache.drill.exec.vector.IntVector in project drill by apache.

the class HashPartition method buildContainersHashTableAndHelper.

/**
 * Creates the hash table and join helper for this partition.
 * This method should only be called after all the build side records
 * have been consumed.
 */
public void buildContainersHashTableAndHelper() throws SchemaChangeException {
    // no building for spilled partitions
    if (isSpilled) {
        return;
    }
    containers = new ArrayList<>();
    hashTable.updateInitialCapacity((int) getNumInMemoryRecords());
    for (int curr = 0; curr < partitionBatchesCount; curr++) {
        VectorContainer nextBatch = tmpBatchesList.get(curr);
        final int currentRecordCount = nextBatch.getRecordCount();
        // For every incoming build batch, we create a matching helper batch
        if (!semiJoin) {
            hjHelper.addNewBatch(currentRecordCount);
        }
        // Holder contains the global index where the key is hashed into using the hash table
        final IndexPointer htIndex = new IndexPointer();
        assert nextBatch != null;
        assert probeBatch != null;
        hashTable.updateIncoming(nextBatch, probeBatch);
        IntVector HV_vector = (IntVector) nextBatch.getLast();
        for (int recInd = 0; recInd < currentRecordCount; recInd++) {
            int hashCode = HV_vector.getAccessor().get(recInd);
            try {
                hashTable.put(recInd, htIndex, hashCode, BATCH_SIZE);
            } catch (RetryAfterSpillException RE) {
                throw new OutOfMemoryException("HT put");
            }
            /* Use the global index returned by the hash table, to store
         * the current record index and batch index. This will be used
         * later when we probe and find a match.
         */
            if (!semiJoin) {
                hjHelper.setCurrentIndex(htIndex.value, curr, /* buildBatchIndex */
                recInd);
            }
        }
        containers.add(nextBatch);
    }
    // the inner is whole in memory, no need for an outer batch
    outerBatchAllocNotNeeded = true;
}
Also used : IntVector(org.apache.drill.exec.vector.IntVector) RetryAfterSpillException(org.apache.drill.common.exceptions.RetryAfterSpillException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Aggregations

IntVector (org.apache.drill.exec.vector.IntVector)69 Test (org.junit.Test)56 BigIntVector (org.apache.drill.exec.vector.BigIntVector)26 SchemaPath (org.apache.drill.common.expression.SchemaPath)23 ExecTest (org.apache.drill.exec.ExecTest)22 SubOperatorTest (org.apache.drill.test.SubOperatorTest)21 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)18 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)18 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)18 SimpleRootExec (org.apache.drill.exec.physical.impl.SimpleRootExec)18 PhysicalPlanReader (org.apache.drill.exec.planner.PhysicalPlanReader)18 OperatorTest (org.apache.drill.categories.OperatorTest)14 IntColumnWriter (org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter)14 DrillbitContext (org.apache.drill.exec.server.DrillbitContext)13 FragmentContextImpl (org.apache.drill.exec.ops.FragmentContextImpl)12 UserClientConnection (org.apache.drill.exec.rpc.UserClientConnection)12 BigIntHolder (org.apache.drill.exec.expr.holders.BigIntHolder)6 IntHolder (org.apache.drill.exec.expr.holders.IntHolder)6 FragmentContext (org.apache.drill.exec.ops.FragmentContext)6 MaterializedField (org.apache.drill.exec.record.MaterializedField)6