Search in sources :

Example 81 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Driver method acquireLocks.

/**
 * Acquire read and write locks needed by the statement. The list of objects to be locked are
 * obtained from the inputs and outputs populated by the compiler.  Locking strategy depends on
 * HiveTxnManager and HiveLockManager configured
 *
 * This method also records the list of valid transactions.  This must be done after any
 * transactions have been opened.
 * @throws CommandProcessorResponse
 */
private void acquireLocks() throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
        /*non acid txn managers don't support txns but fwd lock requests to lock managers
        acid txn manager requires all locks to be associated with a txn so if we
        end up here w/o an open txn it's because we are processing something like "use <database>
        which by definition needs no locks*/
        return;
    }
    try {
        String userFromUGI = getUserFromUGI();
        if (userFromUGI == null) {
            throw createProcessorResponse(10);
        }
        // Set the table write id in all of the acid file sinks
        if (haveAcidWrite()) {
            List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
            // sorting makes tests easier to write since file names and ROW__IDs depend on statementId
            // so this makes (file name -> data) mapping stable
            acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
            for (FileSinkDesc desc : acidSinks) {
                TableDesc tableInfo = desc.getTableInfo();
                long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
                desc.setTableWriteId(writeId);
                // it's possible to have > 1 FileSink writing to the same table/partition
                // e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
                desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
            }
        }
        /*It's imperative that {@code acquireLocks()} is called for all commands so that
      HiveTxnManager can transition its state machine correctly*/
        queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
        if (queryTxnMgr.recordSnapshot(plan)) {
            recordValidTxns(queryTxnMgr);
        }
        if (plan.hasAcidResourcesInQuery()) {
            recordValidWriteIds(queryTxnMgr);
        }
    } catch (Exception e) {
        errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        SQLState = ErrorMsg.findSQLState(e.getMessage());
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(10);
    } finally {
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    }
}
Also used : FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException)

Example 82 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Driver method getSchema.

/**
 * Get a Schema with fields represented with native Hive types
 */
private static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
    Schema schema = null;
    // give up.
    if (sem == null) {
    // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
        List<FieldSchema> lst = sem.getResultSchema();
        schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
        FetchTask ft = sem.getFetchTask();
        TableDesc td = ft.getTblDesc();
        // deserializer.
        if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
            if (ft.getWork().getPartDesc().size() > 0) {
                td = ft.getWork().getPartDesc().get(0).getTableDesc();
            }
        }
        if (td == null) {
            LOG.info("No returning schema.");
        } else {
            String tableName = "result";
            List<FieldSchema> lst = null;
            try {
                lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(conf));
            } catch (Exception e) {
                LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
            }
            if (lst != null) {
                schema = new Schema(lst, null);
            }
        }
    }
    if (schema == null) {
        schema = new Schema();
    }
    LOG.info("Returning Hive schema: " + schema);
    return schema;
}
Also used : Schema(org.apache.hadoop.hive.metastore.api.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) List(java.util.List) LinkedList(java.util.LinkedList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 83 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class SemanticAnalyzer method genScriptPlan.

@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
    // If there is no "AS" clause, the output schema will be "key,value"
    ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
    int inputSerDeNum = 1, inputRecordWriterNum = 2;
    int outputSerDeNum = 4, outputRecordReaderNum = 5;
    int outputColsNum = 6;
    boolean outputColNames = false, outputColSchemas = false;
    int execPos = 3;
    boolean defaultOutputCols = false;
    // Go over all the children
    if (trfm.getChildCount() > outputColsNum) {
        ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
        if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
            outputColNames = true;
        } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
            outputColSchemas = true;
        }
    }
    // If column type is not specified, use a string
    if (!outputColNames && !outputColSchemas) {
        String intName = getColumnInternalName(0);
        ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("key");
        outputCols.add(colInfo);
        intName = getColumnInternalName(1);
        colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("value");
        outputCols.add(colInfo);
        defaultOutputCols = true;
    } else {
        ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
        int ccount = collist.getChildCount();
        Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
        if (outputColNames) {
            for (int i = 0; i < ccount; ++i) {
                String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText()).toLowerCase();
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        } else {
            for (int i = 0; i < ccount; ++i) {
                ASTNode child = (ASTNode) collist.getChild(i);
                assert child.getType() == HiveParser.TOK_TABCOL;
                String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText()).toLowerCase();
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        }
    }
    RowResolver out_rwsch = new RowResolver();
    StringBuilder columns = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < outputCols.size(); ++i) {
        if (i != 0) {
            columns.append(",");
            columnTypes.append(",");
        }
        columns.append(outputCols.get(i).getInternalName());
        columnTypes.append(outputCols.get(i).getType().getTypeName());
        out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
    }
    StringBuilder inpColumns = new StringBuilder();
    StringBuilder inpColumnTypes = new StringBuilder();
    ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
    for (int i = 0; i < inputSchema.size(); ++i) {
        if (i != 0) {
            inpColumns.append(",");
            inpColumnTypes.append(",");
        }
        inpColumns.append(inputSchema.get(i).getInternalName());
        inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
    }
    TableDesc outInfo;
    TableDesc errInfo;
    TableDesc inInfo;
    String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
    Class<? extends Deserializer> serde;
    try {
        serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e);
    }
    int fieldSeparator = Utilities.tabCode;
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
        fieldSeparator = Utilities.ctrlaCode;
    }
    // Input and Output Serdes
    if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
        inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
    } else {
        inInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), false, true);
    }
    if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
        outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
    // This is for backward compatibility. If the user did not specify the
    // output column list, we assume that there are 2 columns: key and value.
    // However, if the script outputs: col1, col2, col3 seperated by TAB, the
    // requirement is: key is col and value is (col2 TAB col3)
    } else {
        outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), defaultOutputCols);
    }
    // Error stream always uses the default serde with a single column
    errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
    // Output record readers
    Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
    Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
    Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    // disable backtracking
    output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Add URI entity for transform script. script assumed t be local unless downloadable
    if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
        String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
        getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 84 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Vectorizer method canSpecializeReduceSink.

private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkDesc vectorDesc) throws HiveException {
    VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
    // Various restrictions.
    // Set this if we encounter a condition we were not expecting.
    boolean isUnexpectedCondition = false;
    boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    int limit = desc.getTopN();
    float memUsage = desc.getTopNMemoryUsage();
    boolean hasPTFTopN = (limit >= 0 && memUsage > 0 && desc.isPTFReduceSink());
    boolean hasDistinctColumns = (desc.getDistinctColumnIndices().size() > 0);
    TableDesc keyTableDesc = desc.getKeySerializeInfo();
    Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
    boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
    TableDesc valueTableDesc = desc.getValueSerializeInfo();
    Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
    boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
    // We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
    // So if we later decide not to specialize, we'll just waste any scratch columns allocated...
    List<ExprNodeDesc> keysDescs = desc.getKeyCols();
    final boolean isEmptyKey = (keysDescs.size() == 0);
    if (!isEmptyKey) {
        VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
        final int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
        final TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
        final Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
        final VectorExpression[] reduceSinkKeyExpressions;
        // Since a key expression can be a calculation and the key will go into a scratch column,
        // we need the mapping and type information.
        ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
        for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
            VectorExpression ve = allKeyExpressions[i];
            reduceSinkKeyColumnMap[i] = ve.getOutputColumnNum();
            reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
            reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
            if (!IdentityExpression.isColumnOnly(ve)) {
                groupByKeyExpressionsList.add(ve);
            }
        }
        if (groupByKeyExpressionsList.size() == 0) {
            reduceSinkKeyExpressions = null;
        } else {
            reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
        }
        vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
        vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
        vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
        vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
    }
    ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
    final boolean isEmptyValue = (valueDescs.size() == 0);
    if (!isEmptyValue) {
        VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
        final int[] reduceSinkValueColumnMap = new int[allValueExpressions.length];
        final TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[allValueExpressions.length];
        final Type[] reduceSinkValueColumnVectorTypes = new Type[allValueExpressions.length];
        VectorExpression[] reduceSinkValueExpressions;
        ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
        for (int i = 0; i < valueDescs.size(); ++i) {
            VectorExpression ve = allValueExpressions[i];
            reduceSinkValueColumnMap[i] = ve.getOutputColumnNum();
            reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
            reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
            if (!IdentityExpression.isColumnOnly(ve)) {
                reduceSinkValueExpressionsList.add(ve);
            }
        }
        if (reduceSinkValueExpressionsList.size() == 0) {
            reduceSinkValueExpressions = null;
        } else {
            reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
        }
        vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
        vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
        vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
        vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
    }
    boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
    vectorReduceSinkInfo.setUseUniformHash(useUniformHash);
    List<ExprNodeDesc> bucketDescs = desc.getBucketCols();
    final boolean isEmptyBuckets = (bucketDescs == null || bucketDescs.size() == 0);
    List<ExprNodeDesc> partitionDescs = desc.getPartitionCols();
    final boolean isEmptyPartitions = (partitionDescs == null || partitionDescs.size() == 0);
    if (useUniformHash || (isEmptyKey && isEmptyBuckets && isEmptyPartitions)) {
    // NOTE: For Uniform Hash or no buckets/partitions, when the key is empty, we will use the VectorReduceSinkEmptyKeyOperator instead.
    } else {
        // Collect bucket and/or partition information for object hashing.
        int[] reduceSinkBucketColumnMap = null;
        TypeInfo[] reduceSinkBucketTypeInfos = null;
        Type[] reduceSinkBucketColumnVectorTypes = null;
        VectorExpression[] reduceSinkBucketExpressions = null;
        if (!isEmptyBuckets) {
            VectorExpression[] allBucketExpressions = vContext.getVectorExpressions(bucketDescs);
            reduceSinkBucketColumnMap = new int[bucketDescs.size()];
            reduceSinkBucketTypeInfos = new TypeInfo[bucketDescs.size()];
            reduceSinkBucketColumnVectorTypes = new Type[bucketDescs.size()];
            ArrayList<VectorExpression> reduceSinkBucketExpressionsList = new ArrayList<VectorExpression>();
            for (int i = 0; i < bucketDescs.size(); ++i) {
                VectorExpression ve = allBucketExpressions[i];
                reduceSinkBucketColumnMap[i] = ve.getOutputColumnNum();
                reduceSinkBucketTypeInfos[i] = bucketDescs.get(i).getTypeInfo();
                reduceSinkBucketColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkBucketTypeInfos[i]);
                if (!IdentityExpression.isColumnOnly(ve)) {
                    reduceSinkBucketExpressionsList.add(ve);
                }
            }
            if (reduceSinkBucketExpressionsList.size() == 0) {
                reduceSinkBucketExpressions = null;
            } else {
                reduceSinkBucketExpressions = reduceSinkBucketExpressionsList.toArray(new VectorExpression[0]);
            }
        }
        int[] reduceSinkPartitionColumnMap = null;
        TypeInfo[] reduceSinkPartitionTypeInfos = null;
        Type[] reduceSinkPartitionColumnVectorTypes = null;
        VectorExpression[] reduceSinkPartitionExpressions = null;
        if (!isEmptyPartitions) {
            VectorExpression[] allPartitionExpressions = vContext.getVectorExpressions(partitionDescs);
            reduceSinkPartitionColumnMap = new int[partitionDescs.size()];
            reduceSinkPartitionTypeInfos = new TypeInfo[partitionDescs.size()];
            reduceSinkPartitionColumnVectorTypes = new Type[partitionDescs.size()];
            ArrayList<VectorExpression> reduceSinkPartitionExpressionsList = new ArrayList<VectorExpression>();
            for (int i = 0; i < partitionDescs.size(); ++i) {
                VectorExpression ve = allPartitionExpressions[i];
                reduceSinkPartitionColumnMap[i] = ve.getOutputColumnNum();
                reduceSinkPartitionTypeInfos[i] = partitionDescs.get(i).getTypeInfo();
                reduceSinkPartitionColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkPartitionTypeInfos[i]);
                if (!IdentityExpression.isColumnOnly(ve)) {
                    reduceSinkPartitionExpressionsList.add(ve);
                }
            }
            if (reduceSinkPartitionExpressionsList.size() == 0) {
                reduceSinkPartitionExpressions = null;
            } else {
                reduceSinkPartitionExpressions = reduceSinkPartitionExpressionsList.toArray(new VectorExpression[0]);
            }
        }
        vectorReduceSinkInfo.setReduceSinkBucketColumnMap(reduceSinkBucketColumnMap);
        vectorReduceSinkInfo.setReduceSinkBucketTypeInfos(reduceSinkBucketTypeInfos);
        vectorReduceSinkInfo.setReduceSinkBucketColumnVectorTypes(reduceSinkBucketColumnVectorTypes);
        vectorReduceSinkInfo.setReduceSinkBucketExpressions(reduceSinkBucketExpressions);
        vectorReduceSinkInfo.setReduceSinkPartitionColumnMap(reduceSinkPartitionColumnMap);
        vectorReduceSinkInfo.setReduceSinkPartitionTypeInfos(reduceSinkPartitionTypeInfos);
        vectorReduceSinkInfo.setReduceSinkPartitionColumnVectorTypes(reduceSinkPartitionColumnVectorTypes);
        vectorReduceSinkInfo.setReduceSinkPartitionExpressions(reduceSinkPartitionExpressions);
    }
    // Remember the condition variables for EXPLAIN regardless.
    vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo);
    vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setIsEmptyKey(isEmptyKey);
    vectorDesc.setIsEmptyValue(isEmptyValue);
    vectorDesc.setIsEmptyBuckets(isEmptyBuckets);
    vectorDesc.setIsEmptyPartitions(isEmptyPartitions);
    vectorDesc.setHasPTFTopN(hasPTFTopN);
    vectorDesc.setHasDistinctColumns(hasDistinctColumns);
    vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
    vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
    // This indicates we logged an inconsistency (from our point-of-view) and will not make this
    // operator native...
    vectorDesc.setIsUnexpectedCondition(isUnexpectedCondition);
    // Many restrictions.
    if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasPTFTopN || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary || isUnexpectedCondition) {
        return false;
    }
    return true;
}
Also used : ArrayList(java.util.ArrayList) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 85 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class BaseSemanticAnalyzer method createFetchTask.

/**
 * Create a FetchTask for a given schema.
 *
 * @param schema string
 */
protected FetchTask createFetchTask(String schema) {
    Properties prop = new Properties();
    // Sets delimiter to tab (ascii 9)
    prop.setProperty(serdeConstants.SERIALIZATION_FORMAT, Integer.toString(Utilities.tabCode));
    prop.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, " ");
    String[] colTypes = schema.split("#");
    prop.setProperty("columns", colTypes[0]);
    prop.setProperty("columns.types", colTypes[1]);
    prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName());
    FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1);
    fetch.setSerializationNullFormat(" ");
    return (FetchTask) TaskFactory.get(fetch);
}
Also used : TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) Properties(java.util.Properties) IgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11