Search in sources :

Example 21 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method createNewGroupingKey.

/*
   * Create a new grouping key for grouping id.
   * A dummy grouping id. is added. At runtime, the group by operator
   * creates 'n' rows per input row, where 'n' is the number of grouping sets.
   */
private void createNewGroupingKey(List<ExprNodeDesc> groupByKeys, List<String> outputColumnNames, RowResolver groupByOutputRowResolver, Map<String, ExprNodeDesc> colExprMap) {
    // The value for the constant does not matter. It is replaced by the grouping set
    // value for the actual implementation
    ExprNodeConstantDesc constant = new ExprNodeConstantDesc(VirtualColumn.GROUPINGID.getTypeInfo(), 0L);
    groupByKeys.add(constant);
    String field = getColumnInternalName(groupByKeys.size() - 1);
    outputColumnNames.add(field);
    groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo(field, VirtualColumn.GROUPINGID.getTypeInfo(), null, true));
    colExprMap.put(field, constant);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 22 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method getColForInsertStmtSpec.

private RowResolver getColForInsertStmtSpec(Map<String, ExprNodeDesc> targetCol2Projection, final Table target, Map<String, ColumnInfo> targetCol2ColumnInfo, int colListPos, List<TypeInfo> targetTableColTypes, ArrayList<ExprNodeDesc> new_col_list, List<String> targetTableColNames) throws SemanticException {
    RowResolver newOutputRR = new RowResolver();
    Map<String, String> colNameToDefaultVal = null;
    // see if we need to fetch default constraints from metastore
    if (targetCol2Projection.size() < targetTableColNames.size()) {
        try {
            DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(target.getDbName(), target.getTableName());
            colNameToDefaultVal = dc.getColNameToDefaultValueMap();
        } catch (Exception e) {
            if (e instanceof SemanticException) {
                throw (SemanticException) e;
            } else {
                throw (new RuntimeException(e));
            }
        }
    }
    boolean defaultConstraintsFetch = true;
    for (int i = 0; i < targetTableColNames.size(); i++) {
        String f = targetTableColNames.get(i);
        if (targetCol2Projection.containsKey(f)) {
            // put existing column in new list to make sure it is in the right position
            new_col_list.add(targetCol2Projection.get(f));
            ColumnInfo ci = targetCol2ColumnInfo.get(f);
            ci.setInternalName(getColumnInternalName(colListPos));
            newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
        } else {
            // add new 'synthetic' columns for projections not provided by Select
            assert (colNameToDefaultVal != null);
            ExprNodeDesc exp = null;
            if (colNameToDefaultVal.containsKey(f)) {
                // make an expression for default value
                String defaultValue = colNameToDefaultVal.get(f);
                ParseDriver parseDriver = new ParseDriver();
                try {
                    ASTNode defValAst = parseDriver.parseExpression(defaultValue);
                    exp = TypeCheckProcFactory.genExprNode(defValAst, new TypeCheckCtx(null)).get(defValAst);
                } catch (Exception e) {
                    throw new SemanticException("Error while parsing default value: " + defaultValue + ". Error message: " + e.getMessage());
                }
                LOG.debug("Added default value from metastore: " + exp);
            } else {
                exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
            }
            new_col_list.add(exp);
            // this column doesn't come from any table
            final String tableAlias = null;
            ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), exp.getWritableObjectInspector(), tableAlias, false);
            newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
        }
        colListPos++;
    }
    return newOutputRR;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 23 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genFileSinkPlan.

@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer dest_type = qbm.getDestTypeForAlias(dest);
    // destination table if any
    Table dest_tab = null;
    // true for full ACID table and MM table
    boolean destTableIsTransactional;
    // should the destination table be written to using ACID
    boolean destTableIsFullAcid;
    boolean destTableIsTemporary = false;
    boolean destTableIsMaterialization = false;
    // destination partition if any
    Partition dest_part = null;
    // the intermediate destination directory
    Path queryTmpdir = null;
    // the final destination directory
    Path dest_path = null;
    TableDesc table_desc = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    ListBucketingCtx lbCtx = null;
    Map<String, String> partSpec = null;
    boolean isMmTable = false, isMmCtas = false;
    Long writeId = null;
    HiveTxnManager txnMgr = SessionState.get().getTxnMgr();
    switch(dest_type.intValue()) {
        case QBMetaData.DEST_TABLE:
            {
                dest_tab = qbm.getDestTableForAlias(dest);
                destTableIsTransactional = AcidUtils.isTransactionalTable(dest_tab);
                destTableIsFullAcid = AcidUtils.isFullAcidTable(dest_tab);
                destTableIsTemporary = dest_tab.isTemporary();
                // Is the user trying to insert into a external tables
                checkExternalTable(dest_tab);
                partSpec = qbm.getPartSpecForAlias(dest);
                dest_path = dest_tab.getPath();
                checkImmutableTable(qb, dest_tab, dest_path, false);
                // check for partition
                List<FieldSchema> parts = dest_tab.getPartitionKeys();
                if (parts != null && parts.size() > 0) {
                    // table is partitioned
                    if (partSpec == null || partSpec.size() == 0) {
                        // user did NOT specify partition
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
                    }
                    dpCtx = qbm.getDPCtx(dest);
                    if (dpCtx == null) {
                        dest_tab.validatePartColumnNames(partSpec, false);
                        dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
                        qbm.setDPCtx(dest, dpCtx);
                    }
                }
                // Check for dynamic partitions.
                dpCtx = checkDynPart(qb, qbm, dest_tab, partSpec, dest);
                if (dpCtx != null && dpCtx.getSPPath() != null) {
                    dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
                }
                boolean isNonNativeTable = dest_tab.isNonNative();
                isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
                if (isNonNativeTable || isMmTable) {
                    queryTmpdir = dest_path;
                } else {
                    queryTmpdir = ctx.getTempDirForFinalJobPath(dest_path);
                }
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + dest_path);
                }
                if (dpCtx != null) {
                    // set the root of the temporary path where dynamic partition columns will populate
                    dpCtx.setRootPath(queryTmpdir);
                }
                // this table_desc does not contain the partitioning columns
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add NOT NULL constraint check
                input = genConstraintsPlan(dest, qb, input);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
                // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
                if (!isNonNativeTable) {
                    AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                    if (destTableIsFullAcid) {
                        acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                        // todo: should this be done for MM?  is it ok to use CombineHiveInputFormat with MM
                        checkAcidConstraints(qb, table_desc, dest_tab);
                    }
                    try {
                        if (ctx.getExplainConfig() != null) {
                            // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                            writeId = 0L;
                        } else {
                            if (isMmTable) {
                                writeId = txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
                            } else {
                                writeId = acidOp == Operation.NOT_ACID ? null : txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
                            }
                        }
                    } catch (LockException ex) {
                        throw new SemanticException("Failed to allocate write Id", ex);
                    }
                    boolean isReplace = !qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
                    ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, writeId);
                    // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
                    // deltas and base and leave them up to the cleaner to clean up
                    boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
                    LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING;
                    ltd.setLoadFileType(loadType);
                    ltd.setInsertOverwrite(!isInsertInto);
                    ltd.setLbCtx(lbCtx);
                    loadTableWork.add(ltd);
                } else {
                    // This is a non-native table.
                    // We need to set stats as inaccurate.
                    setStatsForNonNativeTable(dest_tab);
                    // true if it is insert overwrite.
                    boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
                    createInsertDesc(dest_tab, overwrite);
                }
                if (dest_tab.isMaterializedView()) {
                    materializedViewUpdateDesc = new MaterializedViewDesc(dest_tab.getFullyQualifiedName(), false, false, true);
                }
                WriteEntity output = generateTableWriteEntity(dest, dest_tab, partSpec, ltd, dpCtx, isNonNativeTable);
                ctx.getLoadTableOutputMap().put(ltd, output);
                break;
            }
        case QBMetaData.DEST_PARTITION:
            {
                dest_part = qbm.getDestPartitionForAlias(dest);
                dest_tab = dest_part.getTable();
                destTableIsTransactional = AcidUtils.isTransactionalTable(dest_tab);
                destTableIsFullAcid = AcidUtils.isFullAcidTable(dest_tab);
                checkExternalTable(dest_tab);
                Path tabPath = dest_tab.getPath();
                Path partPath = dest_part.getDataLocation();
                checkImmutableTable(qb, dest_tab, partPath, true);
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
                queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForFinalJobPath(dest_path);
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + dest_path);
                }
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add NOT NULL constraint check
                input = genConstraintsPlan(dest, qb, input);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
                AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                if (destTableIsFullAcid) {
                    acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                    // todo: should this be done for MM?  is it ok to use CombineHiveInputFormat with MM?
                    checkAcidConstraints(qb, table_desc, dest_tab);
                }
                try {
                    if (ctx.getExplainConfig() != null) {
                        // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                        writeId = 0L;
                    } else {
                        if (isMmTable) {
                            writeId = txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
                        } else {
                            writeId = (acidOp == Operation.NOT_ACID) ? null : txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
                        }
                    }
                } catch (LockException ex) {
                    throw new SemanticException("Failed to allocate write Id", ex);
                }
                ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, writeId);
                // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
                // deltas and base and leave them up to the cleaner to clean up
                boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
                LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING;
                ltd.setLoadFileType(loadType);
                ltd.setInsertOverwrite(!isInsertInto);
                ltd.setLbCtx(lbCtx);
                loadTableWork.add(ltd);
                if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
                }
                break;
            }
        case QBMetaData.DEST_LOCAL_FILE:
            isLocal = true;
        // fall through
        case QBMetaData.DEST_DFS_FILE:
            {
                dest_path = new Path(qbm.getDestFileForAlias(dest));
                ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
                // CTAS case: the file output format and serde are defined by the create
                // table command rather than taking the default value
                List<FieldSchema> field_schemas = null;
                CreateTableDesc tblDesc = qb.getTableDesc();
                CreateViewDesc viewDesc = qb.getViewDesc();
                boolean isCtas = false;
                if (tblDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = tblDesc.isTemporary();
                    destTableIsMaterialization = tblDesc.isMaterialization();
                    if (AcidUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) {
                        isMmTable = isMmCtas = true;
                        try {
                            if (ctx.getExplainConfig() != null) {
                                // For explain plan, txn won't be opened and doesn't make sense to allocate write id
                                writeId = 0L;
                            } else {
                                writeId = txnMgr.getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
                            }
                        } catch (LockException ex) {
                            throw new SemanticException("Failed to allocate write Id", ex);
                        }
                        tblDesc.setInitialMmWriteId(writeId);
                    }
                } else if (viewDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = false;
                }
                if (isLocal) {
                    assert !isMmTable;
                    // for local directory - we always write to map-red intermediate
                    // store and then copy to local fs
                    queryTmpdir = ctx.getMRTmpPath();
                } else {
                    // no copy is required. we may want to revisit this policy in future
                    try {
                        Path qPath = FileUtils.makeQualified(dest_path, conf);
                        queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
                        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                            Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + " from " + dest_path + " (" + isMmTable + ")");
                        }
                    } catch (Exception e) {
                        throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
                    }
                }
                ColsAndTypes ct = deriveFileSinkColTypes(inputRR, field_schemas);
                String cols = ct.cols, colTypes = ct.colTypes;
                // update the create table descriptor with the resulting schema.
                if (tblDesc != null) {
                    tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
                } else if (viewDesc != null) {
                    viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
                }
                destTableIsTransactional = tblDesc != null && AcidUtils.isTransactionalTable(tblDesc);
                destTableIsFullAcid = tblDesc != null && AcidUtils.isFullAcidTable(tblDesc);
                boolean isDestTempFile = true;
                if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
                    idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
                    currentTableId = destTableId;
                    destTableId++;
                    isDestTempFile = false;
                }
                boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
                // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats.
                loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes, // there is a change here - prev version had 'transadtional', one beofre' acid'
                destTableIsFullAcid ? Operation.INSERT : Operation.NOT_ACID, isMmCtas));
                if (tblDesc == null) {
                    if (viewDesc != null) {
                        table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
                    } else if (qb.getIsQuery()) {
                        String fileFormat;
                        if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
                            fileFormat = "SequenceFile";
                            HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
                            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                            // write out formatted thrift objects to SequenceFile
                            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
                        } else {
                            fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
                            Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
                            if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
                                serdeClass = LazyBinarySerDe2.class;
                            }
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
                        }
                    } else {
                        table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
                    }
                } else {
                    table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
                }
                if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
                }
                break;
            }
        default:
            throw new SemanticException("Unknown destination type: " + dest_type);
    }
    if (!(dest_type.intValue() == QBMetaData.DEST_DFS_FILE && qb.getIsQuery())) {
        input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
    }
    inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
    if (updating(dest) || deleting(dest)) {
        vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
    } else {
        try {
            StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
            }
        } catch (Exception e) {
            throw new SemanticException(e.getMessage(), e);
        }
    }
    RowSchema fsRS = new RowSchema(vecCol);
    // The output files of a FileSink can be merged if they are either not being written to a table
    // or are being written to a table which is not bucketed
    // and table the table is not sorted
    boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
    // If this table is working with ACID semantics, turn off merging
    canBeMerged &= !destTableIsFullAcid;
    // Generate the partition columns from the parent input
    if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
        genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
    }
    FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, table_desc, dest_part, // this was 1/4 acid
    dest_path, // this was 1/4 acid
    currentTableId, // this was 1/4 acid
    destTableIsFullAcid, // this was 1/4 acid
    destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, canBeMerged, dest_tab, writeId, isMmCtas, dest_type, qb);
    if (isMmCtas) {
        // Add FSD so that the LoadTask compilation could fix up its path to avoid the move.
        tableDesc.setWriter(fileSinkDesc);
    }
    if (fileSinkDesc.getInsertOverwrite()) {
        if (ltd != null) {
            ltd.setInsertOverwrite(true);
        }
    }
    if (SessionState.get().isHiveServerQuery() && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
    } else {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
    handleLineage(ltd, output);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
    }
    FileSinkOperator fso = (FileSinkOperator) output;
    fso.getConf().setTable(dest_tab);
    // and it is an insert overwrite or insert into table
    if (dest_tab != null && !dest_tab.isNonNative() && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
        if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        } else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        }
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LoadFileType(org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType) MaterializedViewDesc(org.apache.hadoop.hive.ql.exec.MaterializedViewDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 24 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genJoinReduceSinkChild.

@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, Operator<?> child, String[] srcs, int tag) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(child));
    RowResolver inputRR = opParseCtx.get(child).getRowResolver();
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc joinKey : joinKeys) {
        reduceKeys.add(joinKey);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, child));
    }
    // Walk over the input row resolver and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    int[] index = new int[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo colInfo = columns.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
        int kindex;
        if (exprBack == null) {
            kindex = -1;
        } else if (ExprNodeDescUtils.isConstant(exprBack)) {
            kindex = reduceKeysBack.indexOf(exprBack);
        } else {
            kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        }
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            outputRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            index[i] = kindex;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        String outputColName = getColumnInternalName(reduceValues.size());
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        outputRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    int numReds = -1;
    // Use only 1 reducer in case of cartesian product
    if (reduceKeys.size() == 0) {
        numReds = 1;
        String error = StrictChecks.checkCartesian(conf);
        if (error != null) {
            throw new SemanticException(error);
        }
    }
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, tag, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(srcs);
    return rsOp;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 25 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method handleInsertStatementSpec.

/**
 * This modifies the Select projections when the Select is part of an insert statement and
 * the insert statement specifies a column list for the target table, e.g.
 * create table source (a int, b int);
 * create table target (x int, y int, z int);
 * insert into target(z,x) select * from source
 *
 * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
 * as if the original query was written as
 * insert into target select b, null, a from source
 *
 * if target schema is not specified, this is no-op
 *
 * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
 * @throws SemanticException
 */
public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, RowResolver inputRR, QB qb, ASTNode selExprList) throws SemanticException {
    // (z,x)
    // specified in the query
    List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
    if (targetTableSchema == null) {
        // no insert schema was specified
        return outputRR;
    }
    if (targetTableSchema.size() != col_list.size()) {
        Table target = qb.getMetaData().getDestTableForAlias(dest);
        Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
        throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
    }
    // e.g. map z->expr for a
    Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
    // e.g. map z->ColumnInfo for a
    Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
    int colListPos = 0;
    for (String targetCol : targetTableSchema) {
        targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
        targetCol2Projection.put(targetCol, col_list.get(colListPos++));
    }
    Table target = qb.getMetaData().getDestTableForAlias(dest);
    Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
    if (target == null && partition == null) {
        throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
    }
    ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
    colListPos = 0;
    List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
    List<String> targetTableColNames = new ArrayList<String>();
    List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
    for (FieldSchema fs : targetTableCols) {
        targetTableColNames.add(fs.getName());
        targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
    }
    Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
    if (partSpec != null) {
        // relies on consistent order via LinkedHashMap
        for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
            if (partKeyVal.getValue() == null) {
                // these must be after non-partition cols
                targetTableColNames.add(partKeyVal.getKey());
                targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
            }
        }
    }
    // now make the select produce <regular columns>,<dynamic partition columns> with
    // where missing columns are NULL-filled
    Table tbl = target == null ? partition.getTable() : target;
    RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos, targetTableColTypes, new_col_list, targetTableColNames);
    col_list.clear();
    col_list.addAll(new_col_list);
    return newOutputRR;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Table(org.apache.hadoop.hive.ql.metadata.Table) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)132 ArrayList (java.util.ArrayList)82 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)79 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 HashMap (java.util.HashMap)53 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)49 LinkedHashMap (java.util.LinkedHashMap)41 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)39 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)33 Operator (org.apache.hadoop.hive.ql.exec.Operator)32 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)32 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)31 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)31 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)31 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)30 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)30 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)30 CheckConstraint (org.apache.hadoop.hive.ql.metadata.CheckConstraint)30 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)30 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)29