Search in sources :

Example 1 with ByteSegmentRef

use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.

the class VectorMapJoinGenerateResultOperator method generateHashMapResultSingleValue.

// ------------------------------------------------------------------------------------------------
/*
   * Common generate join results from hash maps used by Inner and Outer joins.
   */
/**
 * Generate join results for a single small table value match.
 *
 * @param batch
 *          The big table batch.
 * @param hashMapResult
 *          The hash map results for the matching key.
 * @param allMatchs
 *          The selection array for all matches key.
 * @param allMatchesIndex
 *          Index into allMatches of the matching key we are generating for.
 * @param duplicateCount
 *          Number of equal key rows.
 * @param numSel
 *          Current number of rows that are remaining in the big table for forwarding.
 * @return
 *          The new count of selected rows.
 */
protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount, int numSel) throws HiveException, IOException {
    // Read single value.
    ByteSegmentRef byteSegmentRef = hashMapResult.first();
    for (int i = 0; i < duplicateCount; i++) {
        int batchIndex = allMatchs[allMatchesIndex + i];
        // 
        if (bigTableVectorCopyOuterKeys != null) {
            // Copy within row.
            bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex);
        }
        if (smallTableVectorDeserializeRow != null) {
            doSmallTableDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult);
        }
        // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table");
        // Use the big table row as output.
        batch.selected[numSel++] = batchIndex;
    }
    return numSel;
}
Also used : ByteSegmentRef(org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef)

Example 2 with ByteSegmentRef

use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.

the class VectorMapJoinGenerateResultOperator method generateHashMapResultLargeMultiValue.

/**
 * Generate optimized results for a large N x M cross product using repeated vectorized row
 * batch optimization.
 *
 * @param batch
 *          The big table batch.
 * @param hashMapResult
 *          The hash map results for the matching key.
 * @param allMatchs
 *          The all match selected array that contains (physical) batch indices.
 * @param allMatchesIndex
 *          The index of the match key.
 * @param duplicateCount
 *          Number of equal key rows.
 */
private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
    // Kick out previous overflow batch results.
    if (overflowBatch.size > 0) {
        forwardOverflow();
    }
    ByteSegmentRef byteSegmentRef = hashMapResult.first();
    while (byteSegmentRef != null) {
        // Fill up as much of the overflow batch as possible with small table values.
        while (byteSegmentRef != null) {
            if (smallTableVectorDeserializeRow != null) {
                doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
            }
            overflowBatch.size++;
            if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
                break;
            }
            byteSegmentRef = hashMapResult.next();
        }
        for (int i = 0; i < duplicateCount; i++) {
            int batchIndex = allMatchs[allMatchesIndex + i];
            if (bigTableRetainedVectorCopy != null) {
                // The one big table row's values repeat.
                bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0);
                for (int column : bigTableRetainedMapping.getOutputColumns()) {
                    overflowBatch.cols[column].isRepeating = true;
                }
            }
            // Crucial here that we don't reset the overflow batch, or we will loose the small table
            // values we put in above.
            forwardOverflowNoReset();
            // Hand reset the big table columns.
            for (int column : bigTableRetainedMapping.getOutputColumns()) {
                ColumnVector colVector = overflowBatch.cols[column];
                colVector.reset();
            }
        }
        byteSegmentRef = hashMapResult.next();
        if (byteSegmentRef == null) {
            break;
        }
        // Get ready for a another round of small table values.
        overflowBatch.reset();
    }
    // Clear away any residue from our optimizations.
    overflowBatch.reset();
}
Also used : ByteSegmentRef(org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 3 with ByteSegmentRef

use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.

the class VectorMapJoinGenerateResultOperator method doSmallTableDeserializeRow.

protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException {
    byte[] bytes = byteSegmentRef.getBytes();
    int offset = (int) byteSegmentRef.getOffset();
    int length = byteSegmentRef.getLength();
    smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
    try {
        // Our hash tables are immutable.  We can safely do by reference STRING, CHAR/VARCHAR, etc.
        smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex);
    } catch (Exception e) {
        throw new HiveException("\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + smallTableVectorDeserializeRow.getDetailedReadPositionString(), e);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 4 with ByteSegmentRef

use of org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef in project hive by apache.

the class VectorMapJoinGenerateResultOperator method generateHashMapResultMultiValue.

/**
 * Generate results for a N x M cross product.
 *
 * @param batch
 *          The big table batch.
 * @param hashMapResult
 *          The hash map results for the matching key.
 * @param allMatchs
 *          The all match selected array that contains (physical) batch indices.
 * @param allMatchesIndex
 *          The index of the match key.
 * @param duplicateCount
 *          Number of equal key rows.
 */
protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
    if (useOverflowRepeatedThreshold && hashMapResult.isCappedCountAvailable() && hashMapResult.cappedCount() > overflowRepeatedThreshold) {
        // Large cross product: generate the vector optimization using repeating vectorized
        // row batch optimization in the overflow batch.
        generateHashMapResultLargeMultiValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
        return;
    }
    for (int i = 0; i < duplicateCount; i++) {
        int batchIndex = allMatchs[allMatchesIndex + i];
        ByteSegmentRef byteSegmentRef = hashMapResult.first();
        while (byteSegmentRef != null) {
            // Note this includes any outer join keys that need to go into the small table "area".
            if (bigTableRetainedVectorCopy != null) {
                bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, overflowBatch, overflowBatch.size);
            }
            if (smallTableVectorDeserializeRow != null) {
                doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
            }
            // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow");
            overflowBatch.size++;
            if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
                forwardOverflow();
            }
            byteSegmentRef = hashMapResult.next();
        }
    }
}
Also used : ByteSegmentRef(org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef)

Aggregations

ByteSegmentRef (org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef)3 IOException (java.io.IOException)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1