Search in sources :

Example 1 with VectorMapOperatorReadType

use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType in project hive by apache.

the class VectorMapOperator method createAndInitPartitionContext.

public VectorPartitionContext createAndInitPartitionContext(PartitionDesc partDesc, Configuration hconf) throws SerDeException, Exception {
    VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc();
    VectorPartitionContext vectorPartitionContext;
    VectorMapOperatorReadType vectorMapOperatorReadType = vectorPartDesc.getVectorMapOperatorReadType();
    if (vectorMapOperatorReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || vectorMapOperatorReadType == VectorMapOperatorReadType.ROW_DESERIALIZE) {
        // Verify hive.exec.schema.evolution is true or we have an ACID table so we are producing
        // the table schema from ORC.  The Vectorizer class assures this.
        boolean isAcid = AcidUtils.isTablePropertyTransactional(partDesc.getTableDesc().getProperties());
        Preconditions.checkState(Utilities.isSchemaEvolutionEnabled(hconf, isAcid));
    }
    switch(vectorMapOperatorReadType) {
        case VECTORIZED_INPUT_FILE_FORMAT:
            vectorPartitionContext = new VectorizedInputFileFormatPartitionContext(partDesc);
            break;
        case VECTOR_DESERIALIZE:
            vectorPartitionContext = new VectorDeserializePartitionContext(partDesc);
            break;
        case ROW_DESERIALIZE:
            vectorPartitionContext = new RowDeserializePartitionContext(partDesc);
            break;
        default:
            throw new RuntimeException("Unexpected vector MapOperator read type " + vectorMapOperatorReadType.name());
    }
    vectorPartitionContext.init(hconf);
    return vectorPartitionContext;
}
Also used : VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) VectorMapOperatorReadType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)

Example 2 with VectorMapOperatorReadType

use of org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType in project hive by apache.

the class HiveInputFormat method wrapForLlap.

public static InputFormat<WritableComparable, Writable> wrapForLlap(InputFormat<WritableComparable, Writable> inputFormat, Configuration conf, PartitionDesc part) throws HiveException {
    if (!HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon())) {
        // LLAP not enabled, no-op.
        return inputFormat;
    }
    String ifName = inputFormat.getClass().getCanonicalName();
    boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface;
    boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(conf);
    if (!isVectorized) {
        // Pretend it's vectorized if the non-vector wrapped is enabled.
        isVectorized = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED) && (Utilities.getPlanPath(conf) != null);
    }
    boolean isSerdeBased = false;
    if (isVectorized && !isSupported && HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENCODE_ENABLED)) {
        // See if we can use re-encoding to read the format thru IO elevator.
        String formatList = HiveConf.getVar(conf, ConfVars.LLAP_IO_ENCODE_FORMATS);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Checking " + ifName + " against " + formatList);
        }
        String[] formats = StringUtils.getStrings(formatList);
        if (formats != null) {
            for (String format : formats) {
                // TODO: should we check isAssignableFrom?
                if (ifName.equals(format)) {
                    if (LOG.isInfoEnabled()) {
                        LOG.info("Using SerDe-based LLAP reader for " + ifName);
                    }
                    isSupported = isSerdeBased = true;
                    break;
                }
            }
        }
    }
    if (!isSupported || !isVectorized) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not using llap for " + ifName + ": supported = " + isSupported + ", vectorized = " + isVectorized);
        }
        return inputFormat;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Wrapping " + ifName);
    }
    @SuppressWarnings("unchecked") LlapIo<VectorizedRowBatch> llapIo = LlapProxy.getIo();
    if (llapIo == null) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not using LLAP IO because it is not initialized");
        }
        return inputFormat;
    }
    Deserializer serde = null;
    if (isSerdeBased) {
        if (part == null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Not using LLAP IO because there's no partition spec for SerDe-based IF");
            }
            return inputFormat;
        }
        VectorPartitionDesc vpart = part.getVectorPartitionDesc();
        if (vpart != null) {
            VectorMapOperatorReadType old = vpart.getVectorMapOperatorReadType();
            if (old != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("Resetting VectorMapOperatorReadType from " + old + " for partition " + part.getTableName() + " " + part.getPartSpec());
                }
                vpart.setVectorMapOperatorReadType(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT);
            }
        }
        try {
            serde = part.getDeserializer(conf);
        } catch (Exception e) {
            throw new HiveException("Error creating SerDe for LLAP IO", e);
        }
    }
    InputFormat<?, ?> wrappedIf = llapIo.getInputFormat(inputFormat, serde);
    if (wrappedIf == null) {
        // We cannot wrap; the cause is logged inside.
        return inputFormat;
    }
    return castInputFormat(wrappedIf);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) VectorMapOperatorReadType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)

Aggregations

VectorPartitionDesc (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc)2 VectorMapOperatorReadType (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType)2 IOException (java.io.IOException)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 Deserializer (org.apache.hadoop.hive.serde2.Deserializer)1