Search in sources :

Example 1 with VectorPartitionDesc

use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc in project hive by apache.

the class VectorMapOperator method setupPartitionContextVars.

/*
   * Setup the context for reading from the next partition file.
   */
private void setupPartitionContextVars(String nominalPath) throws HiveException {
    currentVectorPartContext = fileToPartitionContextMap.get(nominalPath);
    if (currentVectorPartContext == null) {
        return;
    }
    PartitionDesc partDesc = currentVectorPartContext.getPartDesc();
    VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
    currentReadType = vectorPartDesc.getVectorMapOperatorReadType();
    /*
     * Setup for 3 different kinds of vectorized reading supported:
     *
     *   1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
     *
     *   2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
     *
     *   3) And read using the regular partition deserializer to get the row object and assigning
     *      the row object into the VectorizedRowBatch with VectorAssignRow.
     */
    if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
        /*
       * The Vectorized Input File Format reader is responsible for setting the partition column
       * values, resetting and filling in the batch, etc.
       */
        /*
       * Clear all the reading variables.
       */
        currentDataColumnCount = 0;
        currentDeserializeRead = null;
        currentVectorDeserializeRow = null;
        currentPartDeserializer = null;
        currentPartRawRowObjectInspector = null;
        currentVectorAssign = null;
    } else {
        /*
       * We will get "regular" single rows from the Input File Format reader that we will need
       * to {vector|row} deserialize.
       */
        Preconditions.checkState(currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
        /*
       * Clear out any rows in the batch from previous partition since we are going to change
       * the repeating partition column values.
       */
        if (!flushDeserializerBatch()) {
            // Operator tree is now done.
            return;
        }
        /*
       * For this particular file, how many columns will we actually read?
       */
        currentDataColumnCount = currentVectorPartContext.getReaderDataColumnCount();
        if (currentDataColumnCount < dataColumnCount) {
            /*
         * Default any additional data columns to NULL once for the file (if they are present).
         */
            for (int i = currentDataColumnCount; i < dataColumnCount; i++) {
                ColumnVector colVector = deserializerBatch.cols[i];
                if (colVector != null) {
                    colVector.isNull[0] = true;
                    colVector.noNulls = false;
                    colVector.isRepeating = true;
                }
            }
        }
        if (batchContext.getPartitionColumnCount() > 0) {
            /*
         * The partition columns are set once for the partition and are marked repeating.
         */
            VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues);
            batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
        }
        if (hasRowIdentifier) {
            // No ACID in code path -- set ROW__ID to NULL.
            setRowIdentiferToNull(deserializerBatch);
        }
        /*
       * Set or clear the rest of the reading variables based on {vector|row} deserialization.
       */
        switch(currentReadType) {
            case VECTOR_DESERIALIZE:
                {
                    VectorDeserializePartitionContext vectorDeserPartContext = (VectorDeserializePartitionContext) currentVectorPartContext;
                    // Set ours.
                    currentDeserializeRead = vectorDeserPartContext.getDeserializeRead();
                    currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow();
                    // Clear the other ones.
                    currentPartDeserializer = null;
                    currentPartRawRowObjectInspector = null;
                    currentVectorAssign = null;
                }
                break;
            case ROW_DESERIALIZE:
                {
                    RowDeserializePartitionContext rowDeserPartContext = (RowDeserializePartitionContext) currentVectorPartContext;
                    // Clear the other ones.
                    currentDeserializeRead = null;
                    currentVectorDeserializeRow = null;
                    // Set ours.
                    currentPartDeserializer = rowDeserPartContext.getPartDeserializer();
                    currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector();
                    currentVectorAssign = rowDeserPartContext.getVectorAssign();
                }
                break;
            default:
                throw new RuntimeException("Unexpected VectorMapOperator read type " + currentReadType.name());
        }
    }
}
Also used : VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc)

Example 2 with VectorPartitionDesc

use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc in project hive by apache.

the class VectorMapOperator method createAndInitPartitionContext.

public VectorPartitionContext createAndInitPartitionContext(PartitionDesc partDesc, Configuration hconf) throws SerDeException, Exception {
    VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
    if (vectorPartDesc == null) {
        return null;
    }
    VectorPartitionContext vectorPartitionContext;
    VectorMapOperatorReadType vectorMapOperatorReadType = vectorPartDesc.getVectorMapOperatorReadType();
    if (vectorMapOperatorReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || vectorMapOperatorReadType == VectorMapOperatorReadType.ROW_DESERIALIZE) {
        // Verify hive.exec.schema.evolution is true or we have an ACID table so we are producing
        // the table schema from ORC.  The Vectorizer class assures this.
        boolean isAcid = AcidUtils.isTablePropertyTransactional(partDesc.getTableDesc().getProperties());
        Preconditions.checkState(Utilities.isSchemaEvolutionEnabled(hconf, isAcid));
    }
    switch(vectorMapOperatorReadType) {
        case VECTORIZED_INPUT_FILE_FORMAT:
            vectorPartitionContext = new VectorizedInputFileFormatPartitionContext(partDesc);
            break;
        case VECTOR_DESERIALIZE:
            vectorPartitionContext = new VectorDeserializePartitionContext(partDesc);
            break;
        case ROW_DESERIALIZE:
            vectorPartitionContext = new RowDeserializePartitionContext(partDesc);
            break;
        default:
            throw new RuntimeException("Unexpected vector MapOperator read type " + vectorMapOperatorReadType.name());
    }
    vectorPartitionContext.init(hconf);
    return vectorPartitionContext;
}
Also used : VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) VectorMapOperatorReadType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)

Aggregations

VectorPartitionDesc (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc)2 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)1 VectorMapOperatorReadType (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)1