Search in sources :

Example 11 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveUnionVisitor method genInputSelectForUnion.

private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
    Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
    Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
    List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
    List<String> colName = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    boolean needSelectOp = false;
    while (oIter.hasNext()) {
        ColumnInfo oInfo = oIter.next();
        ColumnInfo uInfo = uIter.next();
        if (!oInfo.isSameColumnForRR(uInfo)) {
            needSelectOp = true;
        }
        ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
        if (!oInfo.getType().equals(uInfo.getType())) {
            column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
        }
        columns.add(column);
        colName.add(uInfo.getInternalName());
        columnExprMap.put(uInfo.getInternalName(), column);
    }
    if (needSelectOp) {
        return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
    } else {
        return origInputOp;
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 12 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveOpConverterUtils method genReduceSinkAndBacktrackSelect.

static SelectOperator genReduceSinkAndBacktrackSelect(Operator<?> input, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List<String> keepColNames) throws SemanticException {
    // 1. Generate RS operator
    // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    // as empty string in table aliases is only allowed for virtual columns.
    String tableAlias = null;
    Set<String> tableNames = input.getSchema().getTableNames();
    for (String tableName : tableNames) {
        if (tableName != null) {
            if (tableName.length() == 0) {
                if (tableAlias == null) {
                    tableAlias = tableName;
                }
            } else {
                if (tableAlias == null || tableAlias.length() == 0) {
                    tableAlias = tableName;
                } else {
                    if (!tableName.equals(tableAlias)) {
                        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only " + "one tableAlias but there is more than one");
                    }
                }
            }
        }
    }
    if (tableAlias == null) {
        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
    }
    // 1.2 Now generate RS operator
    ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf);
    // 2. Generate backtrack Select operator
    Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames, rsOp.getConf().getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), rsOp.getValueIndex(), input);
    SelectDesc selectDesc = new SelectDesc(new ArrayList<ExprNodeDesc>(descriptors.values()), new ArrayList<String>(descriptors.keySet()));
    ArrayList<ColumnInfo> cinfoLst = createColInfosSubset(input, keepColNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, new RowSchema(cinfoLst), rsOp);
    selectOp.setColumnExprMap(descriptors);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
    }
    return selectOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 13 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genFileSinkPlan.

@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer destType = qbm.getDestTypeForAlias(dest);
    // destination table if any
    Table destinationTable = null;
    // true for full ACID table and MM table
    boolean destTableIsTransactional;
    // should the destination table be written to using ACID
    boolean destTableIsFullAcid;
    // should we add files directly to the final path
    boolean isDirectInsert = false;
    AcidUtils.Operation acidOperation = null;
    boolean destTableIsTemporary = false;
    boolean destTableIsMaterialization = false;
    // destination partition if any
    Partition destinationPartition = null;
    // the intermediate destination directory
    Path queryTmpdir = null;
    String moveTaskId = null;
    // the final destination directory
    Path destinationPath = null;
    TableDesc tableDescriptor = null;
    StructObjectInspector specificRowObjectInspector = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    ListBucketingCtx lbCtx = null;
    Map<String, String> partSpec = null;
    boolean isMmTable = false, isMmCreate = false, isNonNativeTable = false;
    Long writeId = null;
    HiveTxnManager txnMgr = getTxnMgr();
    switch(destType.intValue()) {
        case QBMetaData.DEST_TABLE:
            {
                destinationTable = qbm.getDestTableForAlias(dest);
                destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
                destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
                destTableIsTemporary = destinationTable.isTemporary();
                // Is the user trying to insert into a external tables
                checkExternalTable(destinationTable);
                partSpec = qbm.getPartSpecForAlias(dest);
                destinationPath = destinationTable.getPath();
                checkImmutableTable(qb, destinationTable, destinationPath, false);
                // check for partition
                List<FieldSchema> parts = destinationTable.getPartitionKeys();
                if (parts != null && parts.size() > 0) {
                    // table is partitioned
                    if (partSpec == null || partSpec.size() == 0) {
                        // user did NOT specify partition
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
                    }
                    dpCtx = qbm.getDPCtx(dest);
                    if (dpCtx == null) {
                        destinationTable.validatePartColumnNames(partSpec, false);
                        dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
                        qbm.setDPCtx(dest, dpCtx);
                    }
                }
                // Check for dynamic partitions.
                dpCtx = checkDynPart(qb, qbm, destinationTable, partSpec, dest);
                if (dpCtx != null && dpCtx.getSPPath() != null) {
                    destinationPath = new Path(destinationTable.getPath(), dpCtx.getSPPath());
                }
                isNonNativeTable = destinationTable.isNonNative();
                isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
                AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                // this table_desc does not contain the partitioning columns
                tableDescriptor = Utilities.getTableDesc(destinationTable);
                if (!isNonNativeTable) {
                    if (destTableIsTransactional) {
                        acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
                    }
                }
                isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
                acidOperation = acidOp;
                queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
                moveTaskId = getMoveTaskId();
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + destinationPath);
                }
                if (dpCtx != null) {
                    // set the root of the temporary path where dynamic partition columns will populate
                    dpCtx.setRootPath(queryTmpdir);
                }
                // Add NOT NULL constraint check
                input = genConstraintsPlan(dest, qb, input);
                if (!qb.getIsQuery()) {
                    input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, parts);
                }
                if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
                    // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
                    // TODO: We only do this for a full rebuild
                    String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
                    String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
                    if (sortColsStr != null || distributeColsStr != null) {
                        input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
                    }
                } else {
                    // Add sorting/bucketing if needed
                    input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
                }
                idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
                currentTableId = destTableId;
                destTableId++;
                // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
                if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
                    if (destTableIsTransactional) {
                        acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
                        checkAcidConstraints();
                    } else {
                        lbCtx = constructListBucketingCtx(destinationTable.getSkewedColNames(), destinationTable.getSkewedColValues(), destinationTable.getSkewedColValueLocationMaps(), destinationTable.isStoredAsSubDirectories());
                    }
                    try {
                        if (ctx.getExplainConfig() != null) {
                            // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                            writeId = null;
                        } else {
                            if (isMmTable) {
                                writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
                            } else {
                                writeId = acidOp == Operation.NOT_ACID ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
                            }
                        }
                    } catch (LockException ex) {
                        throw new SemanticException("Failed to allocate write Id", ex);
                    }
                    boolean isReplace = !qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
                    ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, isReplace, writeId);
                    if (writeId != null) {
                        ltd.setStmtId(txnMgr.getCurrentStmtId());
                    }
                    ltd.setMoveTaskId(moveTaskId);
                    // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
                    // deltas and base and leave them up to the cleaner to clean up
                    boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
                    LoadFileType loadType;
                    if (isDirectInsert) {
                        loadType = LoadFileType.IGNORE;
                    } else if (!isInsertInto && !destTableIsTransactional) {
                        loadType = LoadFileType.REPLACE_ALL;
                    } else {
                        loadType = LoadFileType.KEEP_EXISTING;
                    }
                    ltd.setLoadFileType(loadType);
                    ltd.setInsertOverwrite(!isInsertInto);
                    ltd.setIsDirectInsert(isDirectInsert);
                    ltd.setLbCtx(lbCtx);
                    loadTableWork.add(ltd);
                } else {
                    // This is a non-native table.
                    // We need to set stats as inaccurate.
                    setStatsForNonNativeTable(destinationTable.getDbName(), destinationTable.getTableName());
                    // true if it is insert overwrite.
                    boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", destinationTable.getDbName(), destinationTable.getTableName()));
                    createPreInsertDesc(destinationTable, overwrite);
                    ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, partSpec == null ? ImmutableMap.of() : partSpec);
                    ltd.setInsertOverwrite(overwrite);
                    ltd.setLoadFileType(overwrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING);
                }
                if (destinationTable.isMaterializedView()) {
                    materializedViewUpdateDesc = new MaterializedViewUpdateDesc(destinationTable.getFullyQualifiedName(), false, false, true);
                }
                WriteEntity output = generateTableWriteEntity(dest, destinationTable, partSpec, ltd, dpCtx);
                ctx.getLoadTableOutputMap().put(ltd, output);
                break;
            }
        case QBMetaData.DEST_PARTITION:
            {
                destinationPartition = qbm.getDestPartitionForAlias(dest);
                destinationTable = destinationPartition.getTable();
                destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
                destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
                checkExternalTable(destinationTable);
                Path partPath = destinationPartition.getDataLocation();
                checkImmutableTable(qb, destinationTable, partPath, true);
                // Previous behavior (HIVE-1707) used to replace the partition's dfs with the table's dfs.
                // The changes in HIVE-19891 appears to no longer support that behavior.
                destinationPath = partPath;
                if (MetaStoreUtils.isArchived(destinationPartition.getTPartition())) {
                    try {
                        String conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull(db, destinationTable, destinationPartition.getSpec());
                        String message = String.format("Insert conflict with existing archive: %s", conflictingArchive);
                        throw new SemanticException(message);
                    } catch (SemanticException err) {
                        throw err;
                    } catch (HiveException err) {
                        throw new SemanticException(err);
                    }
                }
                isNonNativeTable = destinationTable.isNonNative();
                isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
                AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                // this table_desc does not contain the partitioning columns
                tableDescriptor = Utilities.getTableDesc(destinationTable);
                if (!isNonNativeTable) {
                    if (destTableIsTransactional) {
                        acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
                    }
                }
                isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
                acidOperation = acidOp;
                queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
                moveTaskId = getMoveTaskId();
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + destinationPath);
                }
                // Add NOT NULL constraint check
                input = genConstraintsPlan(dest, qb, input);
                if (!qb.getIsQuery()) {
                    input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, null);
                }
                if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
                    // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
                    // TODO: We only do this for a full rebuild
                    String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
                    String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
                    if (sortColsStr != null || distributeColsStr != null) {
                        input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
                    }
                } else {
                    // Add sorting/bucketing if needed
                    input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
                }
                idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
                currentTableId = destTableId;
                destTableId++;
                if (destTableIsTransactional) {
                    acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
                    checkAcidConstraints();
                } else {
                    // Transactional tables can't be list bucketed or have skewed cols
                    lbCtx = constructListBucketingCtx(destinationPartition.getSkewedColNames(), destinationPartition.getSkewedColValues(), destinationPartition.getSkewedColValueLocationMaps(), destinationPartition.isStoredAsSubDirectories());
                }
                try {
                    if (ctx.getExplainConfig() != null) {
                        // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                        writeId = null;
                    } else {
                        if (isMmTable) {
                            writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
                        } else {
                            writeId = (acidOp == Operation.NOT_ACID) ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
                        }
                    }
                } catch (LockException ex) {
                    throw new SemanticException("Failed to allocate write Id", ex);
                }
                ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, destinationPartition.getSpec(), acidOp, writeId);
                if (writeId != null) {
                    ltd.setStmtId(txnMgr.getCurrentStmtId());
                }
                // For the current context for generating File Sink Operator, it is either INSERT INTO or INSERT OVERWRITE.
                // So the next line works.
                boolean isInsertInto = !qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest);
                // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
                // deltas and base and leave them up to the cleaner to clean up
                LoadFileType loadType;
                if (isDirectInsert) {
                    loadType = LoadFileType.IGNORE;
                } else if (!isInsertInto && !destTableIsTransactional) {
                    loadType = LoadFileType.REPLACE_ALL;
                } else {
                    loadType = LoadFileType.KEEP_EXISTING;
                }
                ltd.setLoadFileType(loadType);
                ltd.setInsertOverwrite(!isInsertInto);
                ltd.setIsDirectInsert(isDirectInsert);
                ltd.setLbCtx(lbCtx);
                ltd.setMoveTaskId(moveTaskId);
                loadTableWork.add(ltd);
                if (!outputs.add(new WriteEntity(destinationPartition, determineWriteType(ltd, dest)))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationTable.getTableName() + "@" + destinationPartition.getName()));
                }
                break;
            }
        case QBMetaData.DEST_LOCAL_FILE:
            isLocal = true;
        // fall through
        case QBMetaData.DEST_DFS_FILE:
            {
                destinationPath = getDestinationFilePath(qbm.getDestFileForAlias(dest), isMmTable);
                // CTAS case: the file output format and serde are defined by the create
                // table command rather than taking the default value
                List<FieldSchema> fieldSchemas = null;
                List<FieldSchema> partitionColumns = null;
                List<String> partitionColumnNames = null;
                List<FieldSchema> sortColumns = null;
                List<String> sortColumnNames = null;
                List<FieldSchema> distributeColumns = null;
                List<String> distributeColumnNames = null;
                List<ColumnInfo> fileSinkColInfos = null;
                List<ColumnInfo> sortColInfos = null;
                List<ColumnInfo> distributeColInfos = null;
                TableName tableName = null;
                Map<String, String> tblProps = null;
                CreateTableDesc tblDesc = qb.getTableDesc();
                CreateMaterializedViewDesc viewDesc = qb.getViewDesc();
                if (tblDesc != null) {
                    fieldSchemas = new ArrayList<>();
                    partitionColumns = new ArrayList<>();
                    partitionColumnNames = tblDesc.getPartColNames();
                    fileSinkColInfos = new ArrayList<>();
                    destTableIsTemporary = tblDesc.isTemporary();
                    destTableIsMaterialization = tblDesc.isMaterialization();
                    tableName = TableName.fromString(tblDesc.getDbTableName(), null, tblDesc.getDatabaseName());
                    tblProps = tblDesc.getTblProps();
                } else if (viewDesc != null) {
                    fieldSchemas = new ArrayList<>();
                    partitionColumns = new ArrayList<>();
                    partitionColumnNames = viewDesc.getPartColNames();
                    sortColumns = new ArrayList<>();
                    sortColumnNames = viewDesc.getSortColNames();
                    distributeColumns = new ArrayList<>();
                    distributeColumnNames = viewDesc.getDistributeColNames();
                    fileSinkColInfos = new ArrayList<>();
                    sortColInfos = new ArrayList<>();
                    distributeColInfos = new ArrayList<>();
                    destTableIsTemporary = false;
                    destTableIsMaterialization = false;
                    tableName = HiveTableName.ofNullableWithNoDefault(viewDesc.getViewName());
                    tblProps = viewDesc.getTblProps();
                }
                destTableIsTransactional = tblProps != null && AcidUtils.isTablePropertyTransactional(tblProps);
                if (destTableIsTransactional) {
                    try {
                        if (ctx.getExplainConfig() != null) {
                            // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                            writeId = 0L;
                        } else {
                            writeId = txnMgr.getTableWriteId(tableName.getDb(), tableName.getTable());
                        }
                    } catch (LockException ex) {
                        throw new SemanticException("Failed to allocate write Id", ex);
                    }
                    if (AcidUtils.isInsertOnlyTable(tblProps, true)) {
                        isMmTable = isMmCreate = true;
                        if (tblDesc != null) {
                            tblDesc.setInitialMmWriteId(writeId);
                        } else {
                            viewDesc.setInitialMmWriteId(writeId);
                        }
                    }
                }
                if (isLocal) {
                    assert !isMmTable;
                    // for local directory - we always write to map-red intermediate
                    // store and then copy to local fs
                    queryTmpdir = ctx.getMRTmpPath();
                } else {
                    // no copy is required. we may want to revisit this policy in future
                    try {
                        Path qPath = FileUtils.makeQualified(destinationPath, conf);
                        queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
                        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                            Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + " from " + destinationPath + " (" + isMmTable + ")");
                        }
                    } catch (Exception e) {
                        throw new SemanticException("Error creating temporary folder on: " + destinationPath, e);
                    }
                }
                // Check for dynamic partitions.
                final String cols, colTypes;
                final boolean isPartitioned;
                if (dpCtx != null) {
                    throw new SemanticException("Dynamic partition context has already been created, this should not happen");
                }
                if (!CollectionUtils.isEmpty(partitionColumnNames)) {
                    ColsAndTypes ct = deriveFileSinkColTypes(inputRR, partitionColumnNames, sortColumnNames, distributeColumnNames, fieldSchemas, partitionColumns, sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos);
                    cols = ct.cols;
                    colTypes = ct.colTypes;
                    dpCtx = new DynamicPartitionCtx(partitionColumnNames, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
                    qbm.setDPCtx(dest, dpCtx);
                    // set the root of the temporary path where dynamic partition columns will populate
                    dpCtx.setRootPath(queryTmpdir);
                    isPartitioned = true;
                } else {
                    ColsAndTypes ct = deriveFileSinkColTypes(inputRR, sortColumnNames, distributeColumnNames, fieldSchemas, sortColumns, distributeColumns, sortColInfos, distributeColInfos);
                    cols = ct.cols;
                    colTypes = ct.colTypes;
                    isPartitioned = false;
                }
                // update the create table descriptor with the resulting schema.
                if (tblDesc != null) {
                    tblDesc.setCols(new ArrayList<>(fieldSchemas));
                    tblDesc.setPartCols(new ArrayList<>(partitionColumns));
                } else if (viewDesc != null) {
                    viewDesc.setSchema(new ArrayList<>(fieldSchemas));
                    viewDesc.setPartCols(new ArrayList<>(partitionColumns));
                    if (viewDesc.isOrganized()) {
                        viewDesc.setSortCols(new ArrayList<>(sortColumns));
                        viewDesc.setDistributeCols(new ArrayList<>(distributeColumns));
                    }
                }
                boolean isDestTempFile = true;
                if (ctx.isMRTmpFileURI(destinationPath.toUri().toString()) == false && ctx.isResultCacheDir(destinationPath) == false) {
                    // not a temp dir and not a result cache dir
                    idToTableNameMap.put(String.valueOf(destTableId), destinationPath.toUri().toString());
                    currentTableId = destTableId;
                    destTableId++;
                    isDestTempFile = false;
                }
                if (tblDesc == null) {
                    if (viewDesc != null) {
                        tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
                    } else if (qb.getIsQuery()) {
                        Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
                        String fileFormat = conf.getResultFileFormat().toString();
                        if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
                            serdeClass = ThriftJDBCBinarySerDe.class;
                            fileFormat = ResultFileFormat.SEQUENCEFILE.toString();
                            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                            // write out formatted thrift objects to SequenceFile
                            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
                        } else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
                            // If this output format is Llap, check to see if Arrow is requested
                            boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW);
                            serdeClass = useArrow ? ArrowColumnarBatchSerDe.class : LazyBinarySerDe2.class;
                        }
                        tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
                    } else {
                        tableDescriptor = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
                    }
                } else {
                    tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
                }
                // if available, set location in table desc properties
                if (tblDesc != null && tblDesc.getLocation() != null && tableDescriptor != null && !tableDescriptor.getProperties().containsKey(hive_metastoreConstants.META_TABLE_LOCATION)) {
                    tableDescriptor.getProperties().setProperty(hive_metastoreConstants.META_TABLE_LOCATION, tblDesc.getLocation());
                }
                // We need a specific rowObjectInspector in this case
                try {
                    specificRowObjectInspector = (StructObjectInspector) tableDescriptor.getDeserializer(conf).getObjectInspector();
                } catch (Exception e) {
                    throw new SemanticException(e.getMessage(), e);
                }
                boolean isDfsDir = (destType == QBMetaData.DEST_DFS_FILE);
                try {
                    destinationTable = tblDesc != null ? tblDesc.toTable(conf) : viewDesc != null ? viewDesc.toTable(conf) : null;
                } catch (HiveException e) {
                    throw new SemanticException(e);
                }
                destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
                // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
                if (viewDesc != null && viewDesc.isOrganized()) {
                    input = genMaterializedViewDataOrgPlan(sortColInfos, distributeColInfos, inputRR, input);
                }
                moveTaskId = getMoveTaskId();
                if (isPartitioned) {
                    // Create a SELECT that may reorder the columns if needed
                    RowResolver rowResolver = new RowResolver();
                    List<ExprNodeDesc> columnExprs = new ArrayList<>();
                    List<String> colNames = new ArrayList<>();
                    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
                    for (int i = 0; i < fileSinkColInfos.size(); i++) {
                        ColumnInfo ci = fileSinkColInfos.get(i);
                        ExprNodeDesc columnExpr = new ExprNodeColumnDesc(ci);
                        String name = getColumnInternalName(i);
                        rowResolver.put("", name, new ColumnInfo(name, columnExpr.getTypeInfo(), "", false));
                        columnExprs.add(columnExpr);
                        colNames.add(name);
                        colExprMap.put(name, columnExpr);
                    }
                    input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(columnExprs, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
                    input.setColumnExprMap(colExprMap);
                    // If this is a partitioned CTAS or MV statement, we are going to create a LoadTableDesc
                    // object. Although the table does not exist in metastore, we will swap the CreateTableTask
                    // and MoveTask resulting from this LoadTable so in this specific case, first we create
                    // the metastore table, then we move and commit the partitions. At least for the time being,
                    // this order needs to be enforced because metastore expects a table to exist before we can
                    // add any partitions to it.
                    isNonNativeTable = tableDescriptor.isNonNative();
                    if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
                        AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                        if (destTableIsTransactional) {
                            acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
                            checkAcidConstraints();
                        }
                        // isReplace = false in case concurrent operation is executed
                        ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, false, writeId);
                        if (writeId != null) {
                            ltd.setStmtId(txnMgr.getCurrentStmtId());
                        }
                        ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
                        ltd.setInsertOverwrite(false);
                        loadTableWork.add(ltd);
                    } else {
                        // This is a non-native table.
                        // We need to set stats as inaccurate.
                        setStatsForNonNativeTable(tableDescriptor.getDbName(), tableDescriptor.getTableName());
                        ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx.getPartSpec());
                        ltd.setInsertOverwrite(false);
                        ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
                    }
                    ltd.setMoveTaskId(moveTaskId);
                    ltd.setMdTable(destinationTable);
                    WriteEntity output = generateTableWriteEntity(dest, destinationTable, dpCtx.getPartSpec(), ltd, dpCtx);
                    ctx.getLoadTableOutputMap().put(ltd, output);
                } else {
                    // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats.
                    LoadFileDesc loadFileDesc = new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, destinationPath, isDfsDir, cols, colTypes, // there is a change here - prev version had 'transactional', one before 'acid'
                    destTableIsFullAcid ? Operation.INSERT : Operation.NOT_ACID, isMmCreate);
                    loadFileDesc.setMoveTaskId(moveTaskId);
                    loadFileWork.add(loadFileDesc);
                    try {
                        Path qualifiedPath = destinationPath.getFileSystem(conf).makeQualified(destinationPath);
                        if (!outputs.add(new WriteEntity(qualifiedPath, !isDfsDir, isDestTempFile))) {
                            throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationPath.toUri().toString()));
                        }
                    } catch (IOException ex) {
                        throw new SemanticException("Error while getting the full qualified path for the given directory: " + ex.getMessage());
                    }
                }
                break;
            }
        default:
            throw new SemanticException("Unknown destination type: " + destType);
    }
    inputRR = opParseCtx.get(input).getRowResolver();
    List<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
    if (updating(dest) || deleting(dest)) {
        vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
    } else {
        try {
            // If we already have a specific inspector (view or directory as a target) use that
            // Otherwise use the table deserializer to get the inspector
            StructObjectInspector rowObjectInspector = specificRowObjectInspector != null ? specificRowObjectInspector : (StructObjectInspector) destinationTable.getDeserializer().getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (StructField field : fields) {
                vecCol.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), "", false));
            }
        } catch (Exception e) {
            throw new SemanticException(e.getMessage(), e);
        }
    }
    RowSchema fsRS = new RowSchema(vecCol);
    // The output files of a FileSink can be merged if they are either not being written to a table
    // or are being written to a table which is not bucketed
    // and table the table is not sorted
    boolean canBeMerged = (destinationTable == null || !((destinationTable.getNumBuckets() > 0) || (destinationTable.getSortCols() != null && destinationTable.getSortCols().size() > 0)));
    // If this table is working with ACID semantics, turn off merging
    canBeMerged &= !destTableIsFullAcid;
    // Generate the partition columns from the parent input
    if (destType == QBMetaData.DEST_TABLE || destType == QBMetaData.DEST_PARTITION) {
        genPartnCols(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
    }
    FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, tableDescriptor, destinationPartition, // this was 1/4 acid
    destinationPath, // this was 1/4 acid
    currentTableId, // this was 1/4 acid
    destTableIsFullAcid, // this was 1/4 acid
    destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, canBeMerged, destinationTable, writeId, isMmCreate, destType, qb, isDirectInsert, acidOperation, moveTaskId);
    if (isMmCreate) {
        // Add FSD so that the LoadTask compilation could fix up its path to avoid the move.
        if (tableDesc != null) {
            tableDesc.setWriter(fileSinkDesc);
        } else {
            createVwDesc.setWriter(fileSinkDesc);
        }
    }
    if (fileSinkDesc.getInsertOverwrite()) {
        if (ltd != null) {
            ltd.setInsertOverwrite(true);
        }
    }
    if (null != tableDescriptor && useBatchingSerializer(tableDescriptor.getSerdeClassName())) {
        fileSinkDesc.setIsUsingBatchingSerDe(true);
    } else {
        fileSinkDesc.setIsUsingBatchingSerDe(false);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
    // in case of a merge statement.
    if (!isDirectInsert || acidOperation == AcidUtils.Operation.INSERT) {
        handleLineage(ltd, output);
    }
    setWriteIdForSurrogateKeys(ltd, input);
    LOG.debug("Created FileSink Plan for clause: {}dest_path: {} row schema: {}", dest, destinationPath, inputRR);
    FileSinkOperator fso = (FileSinkOperator) output;
    fso.getConf().setTable(destinationTable);
    // and it is an insert overwrite or insert into table
    if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && destinationTable != null && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask()) && !destTableIsTemporary && !destTableIsMaterialization && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
        if (destType == QBMetaData.DEST_TABLE) {
            genAutoColumnStatsGatheringPipeline(destinationTable, partSpec, input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
        } else if (destType == QBMetaData.DEST_PARTITION) {
            genAutoColumnStatsGatheringPipeline(destinationTable, destinationPartition.getSpec(), input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
        } else if (destType == QBMetaData.DEST_LOCAL_FILE || destType == QBMetaData.DEST_DFS_FILE) {
            // CTAS or CMV statement
            genAutoColumnStatsGatheringPipeline(destinationTable, null, input, false, true);
        }
    }
    return output;
}
Also used : LoadFileType(org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableName(org.apache.hadoop.hive.common.TableName) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) IOException(java.io.IOException) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) MaterializedViewUpdateDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation)

Example 14 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genReduceSinkPlan.

@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, List<ExprNodeDesc> partitionCols, List<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants, boolean isCompaction) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    List<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder newSortOrder = new StringBuilder();
    StringBuilder newNullOrder = new StringBuilder();
    List<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < sortCols.size(); i++) {
        ExprNodeDesc sortCol = sortCols.get(i);
        // we are pulling constants but this is not a constant
        if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
            newSortCols.add(sortCol);
            newSortOrder.append(sortOrder.charAt(i));
            newNullOrder.append(nullOrder.charAt(i));
            sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
        }
    }
    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    RowResolver rsRR = new RowResolver();
    List<String> outputColumns = new ArrayList<String>();
    List<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    List<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> columnInfos = inputRR.getColumnInfos();
    int[] index = new int[columnInfos.size()];
    for (int i = 0; i < index.length; i++) {
        ColumnInfo colInfo = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
        if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
            // ignore, it will be generated by SEL op
            index[i] = Integer.MAX_VALUE;
            constantCols.add(valueBack);
            continue;
        }
        int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
        if (kindex >= 0) {
            index[i] = kindex;
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            rsRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            continue;
        }
        int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -valueCols.size() - 1;
        String outputColName = getColumnInternalName(valueCols.size());
        valueCols.add(value);
        valueColsBack.add(valueBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        rsRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), defaultNullOrder, numReducers, acidOp, isCompaction);
    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
    List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), newSortCols.get(i));
    }
    List<String> valueColNames = rsdesc.getOutputValueColumnNames();
    for (int i = 0; i < valueColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
    }
    interim.setColumnExprMap(colExprMap);
    RowResolver selectRR = new RowResolver();
    List<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
    List<String> selOutputCols = new ArrayList<String>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    Iterator<ExprNodeDesc> constants = constantCols.iterator();
    for (int i = 0; i < index.length; i++) {
        ColumnInfo prev = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(prev.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
        ColumnInfo info = new ColumnInfo(prev);
        ExprNodeDesc desc;
        if (index[i] == Integer.MAX_VALUE) {
            desc = constants.next();
        } else {
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
            }
            desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
        }
        selCols.add(desc);
        String internalName = getColumnInternalName(i);
        info.setInternalName(internalName);
        selectRR.put(nm[0], nm[1], info);
        if (nm2 != null) {
            selectRR.addMappingOnly(nm2[0], nm2[1], info);
        }
        selOutputCols.add(internalName);
        selColExprMap.put(internalName, desc);
    }
    SelectDesc select = new SelectDesc(selCols, selOutputCols);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
    output.setColumnExprMap(selColExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 15 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method insertSelectForSemijoin.

/**
 * Construct a selection operator for semijoin that filter out all fields
 * other than the group by keys.
 *
 * @param fields
 *          list of fields need to be output
 * @param input
 *          input operator
 * @return the selection operator.
 * @throws SemanticException
 */
private Operator insertSelectForSemijoin(List<ASTNode> fields, Operator<?> input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> outputColumnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    RowResolver outputRR = new RowResolver();
    // construct the list of columns that need to be projected
    for (int i = 0; i < fields.size(); ++i) {
        ASTNode field = fields.get(i);
        String[] nm;
        String[] nm2;
        ExprNodeDesc expr = genExprNodeDesc(field, inputRR);
        if (expr instanceof ExprNodeColumnDesc) {
            // In most of the cases, this is a column reference
            ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) expr;
            nm = inputRR.reverseLookup(columnExpr.getColumn());
            nm2 = inputRR.getAlternateMappings(columnExpr.getColumn());
        } else if (expr instanceof ExprNodeConstantDesc) {
            // However, it can be a constant too. In that case, we need to track
            // the column that it originated from in the input operator so we can
            // propagate the aliases.
            ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) expr;
            String inputCol = constantExpr.getFoldedFromCol();
            nm = inputRR.reverseLookup(inputCol);
            nm2 = inputRR.getAlternateMappings(inputCol);
        } else {
            // of the left semijoin
            return input;
        }
        String colName = getColumnInternalName(i);
        outputColumnNames.add(colName);
        ColumnInfo colInfo = new ColumnInfo(colName, expr.getTypeInfo(), "", false);
        outputRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            outputRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
        colList.add(expr);
        colExprMap.put(colName, expr);
    }
    // create selection operator
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, outputColumnNames, false), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12