Search in sources :

Example 1 with DynamicPartitionCtx

use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.

the class FileSinkOperator method jobCloseOp.

@Override
public void jobCloseOp(Configuration hconf, boolean success) throws HiveException {
    try {
        if ((conf != null) && isNativeTable) {
            Path specPath = conf.getDirName();
            DynamicPartitionCtx dpCtx = conf.getDynPartCtx();
            if (conf.isLinkedFileSink() && (dpCtx != null)) {
                specPath = conf.getParentDir();
            }
            Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, reporter);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    }
    super.jobCloseOp(hconf, success);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) IOException(java.io.IOException)

Example 2 with DynamicPartitionCtx

use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.

the class StatsTask method getPartitionsList.

/**
   * Get the list of partitions that need to update statistics.
   * TODO: we should reuse the Partitions generated at compile time
   * since getting the list of partitions is quite expensive.
   *
   * @return a list of partitions that need to update statistics.
   * @throws HiveException
   */
private List<Partition> getPartitionsList(Hive db) throws HiveException {
    if (work.getLoadFileDesc() != null) {
        //we are in CTAS, so we know there are no partitions
        return null;
    }
    List<Partition> list = new ArrayList<Partition>();
    if (work.getTableSpecs() != null) {
        // ANALYZE command
        TableSpec tblSpec = work.getTableSpecs();
        table = tblSpec.tableHandle;
        if (!table.isPartitioned()) {
            return null;
        }
        // get all partitions that matches with the partition spec
        List<Partition> partitions = tblSpec.partitions;
        if (partitions != null) {
            for (Partition partn : partitions) {
                list.add(partn);
            }
        }
    } else if (work.getLoadTableDesc() != null) {
        // INSERT OVERWRITE command
        LoadTableDesc tbd = work.getLoadTableDesc();
        table = db.getTable(tbd.getTable().getTableName());
        if (!table.isPartitioned()) {
            return null;
        }
        DynamicPartitionCtx dpCtx = tbd.getDPCtx();
        if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
            // If no dynamic partitions are generated, dpPartSpecs may not be initialized
            if (dpPartSpecs != null) {
                // load the list of DP partitions and return the list of partition specs
                list.addAll(dpPartSpecs);
            }
        } else {
            // static partition
            Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
            list.add(partn);
        }
    }
    return list;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableSpec(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec) ArrayList(java.util.ArrayList) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)

Example 3 with DynamicPartitionCtx

use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.

the class GenMapRedUtils method createMRWorkForMergingFiles.

/**
   * @param fsInput The FileSink operator.
   * @param ctx The MR processing context.
   * @param finalName the final destination path the merge job should output.
   * @param dependencyTask
   * @param mvTasks
   * @param conf
   * @param currTask
   * @throws SemanticException

   * create a Map-only merge job using CombineHiveInputFormat for all partitions with
   * following operators:
   *          MR job J0:
   *          ...
   *          |
   *          v
   *          FileSinkOperator_1 (fsInput)
   *          |
   *          v
   *          Merge job J1:
   *          |
   *          v
   *          TableScan (using CombineHiveInputFormat) (tsMerge)
   *          |
   *          v
   *          FileSinkOperator (fsMerge)
   *
   *          Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
   *          do
   *          not contain the dynamic partitions (their parent). So after the dynamic partitions are
   *          created (after the first job finished before the moveTask or ConditionalTask start),
   *          we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
   *          partition
   *          directories.
   *
   */
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<? extends Serializable> currTask) throws SemanticException {
    //
    // 1. create the operator tree
    //
    FileSinkDesc fsInputDesc = fsInput.getConf();
    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    TableScanOperator tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
        // adding DP ColumnInfo to the RowSchema signature
        ArrayList<ColumnInfo> signature = inputRS.getSignature();
        String tblAlias = fsInputDesc.getTableInfo().getTableName();
        for (String dpCol : dpCtx.getDPColNames()) {
            ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
            TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
            true);
            signature.add(colInfo);
        }
        inputRS.setSignature(signature);
        // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
        DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
        fsOutputDesc.setDynPartCtx(dpCtx2);
        // update the FileSinkOperator to include partition columns
        usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames());
    } else {
        // non-partitioned table
        fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    }
    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MoveWork dummyMv = new MoveWork(null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
    MapWork cplan;
    Serializable work;
    if ((conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class))) {
        cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
        if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
            work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
            cplan.setName("File Merge");
            ((TezWork) work).add(cplan);
        } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
            work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            cplan.setName("Spark Merge File Work");
            ((SparkWork) work).add(cplan);
        } else {
            work = cplan;
        }
    } else {
        cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
        if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
            work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
            cplan.setName("File Merge");
            ((TezWork) work).add(cplan);
        } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
            work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            cplan.setName("Spark Merge File Work");
            ((SparkWork) work).add(cplan);
        } else {
            work = new MapredWork();
            ((MapredWork) work).setMapWork(cplan);
        }
    }
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    Task<MoveWork> mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOutput);
    ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getFinalDirName(), finalName, mvTask, dependencyTask);
    // keep the dynamic partition context in conditional task resolver context
    ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
    mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
    mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) Serializable(java.io.Serializable) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ConditionalResolverMergeFilesCtx(org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 4 with DynamicPartitionCtx

use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.

the class SemanticAnalyzer method genFileSinkPlan.

@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer dest_type = qbm.getDestTypeForAlias(dest);
    // destination table if any
    Table dest_tab = null;
    // should the destination table be written to using ACID
    boolean destTableIsAcid = false;
    boolean destTableIsTemporary = false;
    boolean destTableIsMaterialization = false;
    // destination partition if any
    Partition dest_part = null;
    // the intermediate destination directory
    Path queryTmpdir = null;
    // the final destination directory
    Path dest_path = null;
    TableDesc table_desc = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    ListBucketingCtx lbCtx = null;
    Map<String, String> partSpec = null;
    switch(dest_type.intValue()) {
        case QBMetaData.DEST_TABLE:
            {
                dest_tab = qbm.getDestTableForAlias(dest);
                destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
                destTableIsTemporary = dest_tab.isTemporary();
                // Is the user trying to insert into a external tables
                if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) {
                    throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
                }
                partSpec = qbm.getPartSpecForAlias(dest);
                dest_path = dest_tab.getPath();
                // verify that our destination is empty before proceeding
                if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
                    try {
                        FileSystem fs = dest_path.getFileSystem(conf);
                        if (!MetaStoreUtils.isDirEmpty(fs, dest_path)) {
                            LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path);
                            throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
                        }
                    } catch (IOException ioe) {
                        LOG.warn("Error while trying to determine if immutable table has any data : " + dest_tab.getTableName() + " : " + dest_path);
                        throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
                    }
                }
                // check for partition
                List<FieldSchema> parts = dest_tab.getPartitionKeys();
                if (parts != null && parts.size() > 0) {
                    // table is partitioned
                    if (partSpec == null || partSpec.size() == 0) {
                        // user did NOT specify partition
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
                    }
                    dpCtx = qbm.getDPCtx(dest);
                    if (dpCtx == null) {
                        dest_tab.validatePartColumnNames(partSpec, false);
                        dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
                        qbm.setDPCtx(dest, dpCtx);
                    }
                    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) {
                        // allow DP
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
                    }
                    if (dpCtx.getSPPath() != null) {
                        dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
                    }
                    if ((dest_tab.getNumBuckets() > 0)) {
                        dpCtx.setNumBuckets(dest_tab.getNumBuckets());
                    }
                }
                boolean isNonNativeTable = dest_tab.isNonNative();
                if (isNonNativeTable) {
                    queryTmpdir = dest_path;
                } else {
                    queryTmpdir = ctx.getTempDirForPath(dest_path, true);
                }
                if (dpCtx != null) {
                    // set the root of the temporary path where dynamic partition columns will populate
                    dpCtx.setRootPath(queryTmpdir);
                }
                // this table_desc does not contain the partitioning columns
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
                // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
                if (!isNonNativeTable) {
                    AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                    if (destTableIsAcid) {
                        acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                        checkAcidConstraints(qb, table_desc, dest_tab);
                    }
                    ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp);
                    ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
                    ltd.setLbCtx(lbCtx);
                    loadTableWork.add(ltd);
                } else {
                    // This is a non-native table.
                    // We need to set stats as inaccurate.
                    setStatsForNonNativeTable(dest_tab);
                    // true if it is insert overwrite.
                    boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
                    createInsertDesc(dest_tab, overwrite);
                }
                WriteEntity output = null;
                // list of dynamically created partitions are known.
                if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
                    output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest));
                    if (!outputs.add(output)) {
                        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName()));
                    }
                }
                if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
                    // No static partition specified
                    if (dpCtx.getNumSPCols() == 0) {
                        output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false);
                        outputs.add(output);
                        output.setDynamicPartitionWrite(true);
                    } else // part of the partition specified
                    // Create a DummyPartition in this case. Since, the metastore does not store partial
                    // partitions currently, we need to store dummy partitions
                    {
                        try {
                            String ppath = dpCtx.getSPPath();
                            ppath = ppath.substring(0, ppath.length() - 1);
                            DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, partSpec);
                            output = new WriteEntity(p, getWriteType(dest), false);
                            output.setDynamicPartitionWrite(true);
                            outputs.add(output);
                        } catch (HiveException e) {
                            throw new SemanticException(e.getMessage(), e);
                        }
                    }
                }
                ctx.getLoadTableOutputMap().put(ltd, output);
                break;
            }
        case QBMetaData.DEST_PARTITION:
            {
                dest_part = qbm.getDestPartitionForAlias(dest);
                dest_tab = dest_part.getTable();
                destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
                if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) {
                    throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
                }
                Path tabPath = dest_tab.getPath();
                Path partPath = dest_part.getDataLocation();
                // verify that our destination is empty before proceeding
                if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
                    try {
                        FileSystem fs = partPath.getFileSystem(conf);
                        if (!MetaStoreUtils.isDirEmpty(fs, partPath)) {
                            LOG.warn("Attempted write into an immutable table partition : " + dest_tab.getTableName() + " : " + partPath);
                            throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
                        }
                    } catch (IOException ioe) {
                        LOG.warn("Error while trying to determine if immutable table partition has any data : " + dest_tab.getTableName() + " : " + partPath);
                        throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
                    }
                }
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                queryTmpdir = ctx.getTempDirForPath(dest_path, true);
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
                AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                if (destTableIsAcid) {
                    acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                    checkAcidConstraints(qb, table_desc, dest_tab);
                }
                ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
                ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
                ltd.setLbCtx(lbCtx);
                loadTableWork.add(ltd);
                if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
                }
                break;
            }
        case QBMetaData.DEST_LOCAL_FILE:
            isLocal = true;
        // fall through
        case QBMetaData.DEST_DFS_FILE:
            {
                dest_path = new Path(qbm.getDestFileForAlias(dest));
                if (isLocal) {
                    // for local directory - we always write to map-red intermediate
                    // store and then copy to local fs
                    queryTmpdir = ctx.getMRTmpPath();
                } else {
                    try {
                        Path qPath = FileUtils.makeQualified(dest_path, conf);
                        queryTmpdir = ctx.getTempDirForPath(qPath, true);
                    } catch (Exception e) {
                        throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
                    }
                }
                String cols = "";
                String colTypes = "";
                ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
                // CTAS case: the file output format and serde are defined by the create
                // table command rather than taking the default value
                List<FieldSchema> field_schemas = null;
                CreateTableDesc tblDesc = qb.getTableDesc();
                CreateViewDesc viewDesc = qb.getViewDesc();
                if (tblDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = tblDesc.isTemporary();
                    destTableIsMaterialization = tblDesc.isMaterialization();
                } else if (viewDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = false;
                }
                boolean first = true;
                for (ColumnInfo colInfo : colInfos) {
                    String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
                    if (nm[1] != null) {
                        // non-null column alias
                        colInfo.setAlias(nm[1]);
                    }
                    //default column name
                    String colName = colInfo.getInternalName();
                    if (field_schemas != null) {
                        FieldSchema col = new FieldSchema();
                        if (!("".equals(nm[0])) && nm[1] != null) {
                            // remove ``
                            colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase();
                        }
                        colName = fixCtasColumnName(colName);
                        col.setName(colName);
                        String typeName = colInfo.getType().getTypeName();
                        // CTAS should NOT create a VOID type
                        if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
                            throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
                        }
                        col.setType(typeName);
                        field_schemas.add(col);
                    }
                    if (!first) {
                        cols = cols.concat(",");
                        colTypes = colTypes.concat(":");
                    }
                    first = false;
                    cols = cols.concat(colName);
                    // Replace VOID type with string when the output is a temp table or
                    // local files.
                    // A VOID type can be generated under the query:
                    //
                    // select NULL from tt;
                    // or
                    // insert overwrite local directory "abc" select NULL from tt;
                    //
                    // where there is no column type to which the NULL value should be
                    // converted.
                    //
                    String tName = colInfo.getType().getTypeName();
                    if (tName.equals(serdeConstants.VOID_TYPE_NAME)) {
                        colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME);
                    } else {
                        colTypes = colTypes.concat(tName);
                    }
                }
                // update the create table descriptor with the resulting schema.
                if (tblDesc != null) {
                    tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
                } else if (viewDesc != null) {
                    viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
                }
                boolean isDestTempFile = true;
                if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
                    idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
                    currentTableId = destTableId;
                    destTableId++;
                    isDestTempFile = false;
                }
                boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
                loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
                if (tblDesc == null) {
                    if (viewDesc != null) {
                        table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
                    } else if (qb.getIsQuery()) {
                        String fileFormat;
                        if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
                            fileFormat = "SequenceFile";
                            HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
                            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                            // write out formatted thrift objects to SequenceFile
                            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
                        } else {
                            fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, LazySimpleSerDe.class);
                        }
                    } else {
                        table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
                    }
                } else {
                    table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
                }
                if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
                }
                break;
            }
        default:
            throw new SemanticException("Unknown destination type: " + dest_type);
    }
    input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
    inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
    if (updating(dest) || deleting(dest)) {
        vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
    } else {
        try {
            StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
            }
        } catch (Exception e) {
            throw new SemanticException(e.getMessage(), e);
        }
    }
    RowSchema fsRS = new RowSchema(vecCol);
    // The output files of a FileSink can be merged if they are either not being written to a table
    // or are being written to a table which is not bucketed
    // and table the table is not sorted
    boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
    // If this table is working with ACID semantics, turn off merging
    canBeMerged &= !destTableIsAcid;
    // Generate the partition columns from the parent input
    if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
        genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
    }
    FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, dest_path);
    boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
    fileSinkDesc.setHiveServerQuery(isHiveServerQuery);
    // FileSinkOperator knows how to properly write to it.
    if (destTableIsAcid) {
        AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT);
        fileSinkDesc.setWriteType(wt);
        acidFileSinks.add(fileSinkDesc);
    }
    fileSinkDesc.setTemporary(destTableIsTemporary);
    fileSinkDesc.setMaterialization(destTableIsMaterialization);
    /* Set List Bucketing context. */
    if (lbCtx != null) {
        lbCtx.processRowSkewedIndex(fsRS);
        lbCtx.calculateSkewedValueSubDirList();
    }
    fileSinkDesc.setLbCtx(lbCtx);
    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
    if (!destTableIsMaterialization && HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
        String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
        fileSinkDesc.setStatsTmpDir(statsTmpLoc);
        LOG.debug("Set stats collection dir : " + statsTmpLoc);
    }
    if (dest_part != null) {
        try {
            String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
            fileSinkDesc.setStaticSpec(staticSpec);
        } catch (MetaException e) {
            throw new SemanticException(e);
        }
    } else if (dpCtx != null) {
        fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }
    if (isHiveServerQuery && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
    } else {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
    if (ltd != null && SessionState.get() != null) {
        SessionState.get().getLineageState().mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output);
    } else if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) {
        Path tlocation = null;
        String tName = Utilities.getDbTableName(tableDesc.getTableName())[1];
        try {
            Warehouse wh = new Warehouse(conf);
            tlocation = wh.getTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName);
        } catch (MetaException | HiveException e) {
            throw new SemanticException(e);
        }
        SessionState.get().getLineageState().mapDirToOp(tlocation, (FileSinkOperator) output);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
    }
    FileSinkOperator fso = (FileSinkOperator) output;
    fso.getConf().setTable(dest_tab);
    fsopToTable.put(fso, dest_tab);
    // and it is an insert overwrite or insert into table
    if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
        if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        } else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        }
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) FileSystem(org.apache.hadoop.fs.FileSystem) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) IOException(java.io.IOException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 5 with DynamicPartitionCtx

use of org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx in project hive by apache.

the class MoveTask method execute.

@Override
public int execute(DriverContext driverContext) {
    try {
        if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) {
            return 0;
        }
        Hive db = getHive();
        // Do any hive related operations like moving tables and files
        // to appropriate locations
        LoadFileDesc lfd = work.getLoadFileWork();
        if (lfd != null) {
            Path targetPath = lfd.getTargetDir();
            Path sourcePath = lfd.getSourcePath();
            moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
        }
        // Multi-file load is for dynamic partitions when some partitions do not
        // need to merge and they can simply be moved to the target directory.
        LoadMultiFilesDesc lmfd = work.getLoadMultiFilesWork();
        if (lmfd != null) {
            boolean isDfsDir = lmfd.getIsDfsDir();
            int i = 0;
            while (i < lmfd.getSourceDirs().size()) {
                Path srcPath = lmfd.getSourceDirs().get(i);
                Path destPath = lmfd.getTargetDirs().get(i);
                FileSystem fs = destPath.getFileSystem(conf);
                if (!fs.exists(destPath.getParent())) {
                    fs.mkdirs(destPath.getParent());
                }
                moveFile(srcPath, destPath, isDfsDir);
                i++;
            }
        }
        // Next we do this for tables and partitions
        LoadTableDesc tbd = work.getLoadTableWork();
        if (tbd != null) {
            StringBuilder mesg = new StringBuilder("Loading data to table ").append(tbd.getTable().getTableName());
            if (tbd.getPartitionSpec().size() > 0) {
                mesg.append(" partition (");
                Map<String, String> partSpec = tbd.getPartitionSpec();
                for (String key : partSpec.keySet()) {
                    mesg.append(key).append('=').append(partSpec.get(key)).append(", ");
                }
                mesg.setLength(mesg.length() - 2);
                mesg.append(')');
            }
            String mesg_detail = " from " + tbd.getSourcePath();
            console.printInfo(mesg.toString(), mesg_detail);
            Table table = db.getTable(tbd.getTable().getTableName());
            if (work.getCheckFileFormat()) {
                // Get all files from the src directory
                FileStatus[] dirs;
                ArrayList<FileStatus> files;
                // source filesystem
                FileSystem srcFs;
                try {
                    srcFs = tbd.getSourcePath().getFileSystem(conf);
                    dirs = srcFs.globStatus(tbd.getSourcePath());
                    files = new ArrayList<FileStatus>();
                    for (int i = 0; (dirs != null && i < dirs.length); i++) {
                        files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER)));
                        // one.
                        if (files.size() > 0) {
                            break;
                        }
                    }
                } catch (IOException e) {
                    throw new HiveException("addFiles: filesystem error in check phase", e);
                }
                // handle file format check for table level
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
                    boolean flag = true;
                    // dynamic partition context is null
                    if (tbd.getDPCtx() == null) {
                        if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) {
                            // Check if the file format of the file matches that of the table.
                            flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
                        } else {
                            // Check if the file format of the file matches that of the partition
                            Partition oldPart = db.getPartition(table, tbd.getPartitionSpec(), false);
                            if (oldPart == null) {
                                // this means we have just created a table and are specifying partition in the
                                // load statement (without pre-creating the partition), in which case lets use
                                // table input format class. inheritTableSpecs defaults to true so when a new
                                // partition is created later it will automatically inherit input format
                                // from table object
                                flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
                            } else {
                                flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, oldPart.getInputFormatClass(), files);
                            }
                        }
                        if (!flag) {
                            throw new HiveException("Wrong file format. Please check the file's format.");
                        }
                    } else {
                        LOG.warn("Skipping file format check as dpCtx is not null");
                    }
                }
            }
            // Create a data container
            DataContainer dc = null;
            if (tbd.getPartitionSpec().size() == 0) {
                dc = new DataContainer(table.getTTable());
                db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
                if (work.getOutputs() != null) {
                    DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
                }
            } else {
                LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
                // Check if the bucketing and/or sorting columns were inferred
                List<BucketCol> bucketCols = null;
                List<SortCol> sortCols = null;
                int numBuckets = -1;
                Task task = this;
                String path = tbd.getSourcePath().toUri().toString();
                // (Either standard, local, or a merge)
                while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
                    task = (Task) task.getParentTasks().get(0);
                    // If it was a merge task or a local map reduce task, nothing can be inferred
                    if (task instanceof MergeFileTask || task instanceof MapredLocalTask) {
                        break;
                    }
                    // the directory this move task is moving
                    if (task instanceof MapRedTask) {
                        MapredWork work = (MapredWork) task.getWork();
                        MapWork mapWork = work.getMapWork();
                        bucketCols = mapWork.getBucketedColsByDirectory().get(path);
                        sortCols = mapWork.getSortedColsByDirectory().get(path);
                        if (work.getReduceWork() != null) {
                            numBuckets = work.getReduceWork().getNumReduceTasks();
                        }
                        if (bucketCols != null || sortCols != null) {
                            // operator that writes the final output)
                            assert work.isFinalMapRed();
                        }
                        break;
                    }
                    // condition for merging is not met, see GenMRFileSink1.
                    if (task instanceof MoveTask) {
                        if (((MoveTask) task).getWork().getLoadFileWork() != null) {
                            path = ((MoveTask) task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
                        }
                    }
                }
                // deal with dynamic partitions
                DynamicPartitionCtx dpCtx = tbd.getDPCtx();
                if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
                    // dynamic partitions
                    List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);
                    console.printInfo(System.getProperty("line.separator"));
                    long startTime = System.currentTimeMillis();
                    // load the list of DP partitions and return the list of partition specs
                    // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
                    // to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
                    // After that check the number of DPs created to not exceed the limit and
                    // iterate over it and call loadPartition() here.
                    // The reason we don't do inside HIVE-1361 is the latter is large and we
                    // want to isolate any potential issue it may introduce.
                    Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), dpCtx.getNumDPCols(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), work.getLoadTableWork().getWriteType());
                    // publish DP columns to its subscribers
                    if (dps != null && dps.size() > 0) {
                        pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values());
                    }
                    String loadTime = "\t Time taken to load dynamic partitions: " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds";
                    console.printInfo(loadTime);
                    LOG.info(loadTime);
                    if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
                        throw new HiveException("This query creates no partitions." + " To turn off this error, set hive.error.on.empty.partition=false.");
                    }
                    startTime = System.currentTimeMillis();
                    // and put it to WriteEntity for post-exec hook
                    for (Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) {
                        Partition partn = entry.getValue();
                        if (bucketCols != null || sortCols != null) {
                            updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
                        }
                        WriteEntity enty = new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType()));
                        if (work.getOutputs() != null) {
                            DDLTask.addIfAbsentByName(enty, work.getOutputs());
                        }
                        // queryPlan here.
                        if (queryPlan.getOutputs() == null) {
                            queryPlan.setOutputs(new LinkedHashSet<WriteEntity>());
                        }
                        queryPlan.getOutputs().add(enty);
                        // update columnar lineage for each partition
                        dc = new DataContainer(table.getTTable(), partn.getTPartition());
                        // Don't set lineage on delete as we don't have all the columns
                        if (SessionState.get() != null && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) {
                            SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols());
                        }
                        LOG.info("\tLoading partition " + entry.getKey());
                    }
                    console.printInfo("\t Time taken for adding to write entity : " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
                    // reset data container to prevent it being added again.
                    dc = null;
                } else {
                    // static partitions
                    List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec());
                    db.validatePartitionNameCharacters(partVals);
                    db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
                    Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
                    if (bucketCols != null || sortCols != null) {
                        updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
                    }
                    dc = new DataContainer(table.getTTable(), partn.getTPartition());
                    // add this partition to post-execution hook
                    if (work.getOutputs() != null) {
                        DDLTask.addIfAbsentByName(new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
                    }
                }
            }
            if (SessionState.get() != null && dc != null) {
                // If we are doing an update or a delete the number of columns in the table will not
                // match the number of columns in the file sink.  For update there will be one too many
                // (because of the ROW__ID), and in the case of the delete there will be just the
                // ROW__ID, which we don't need to worry about from a lineage perspective.
                List<FieldSchema> tableCols = null;
                switch(work.getLoadTableWork().getWriteType()) {
                    case DELETE:
                    case UPDATE:
                        // Pass an empty list as no columns will be written to the file.
                        // TODO I should be able to make this work for update
                        tableCols = new ArrayList<FieldSchema>();
                        break;
                    default:
                        tableCols = table.getCols();
                        break;
                }
                SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, tableCols);
            }
            releaseLocks(tbd);
        }
        return 0;
    } catch (Exception e) {
        console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
        setException(e);
        return (1);
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) LinkedHashMap(java.util.LinkedHashMap) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DataContainer(org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) IOException(java.io.IOException) LoadMultiFilesDesc(org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) Hive(org.apache.hadoop.hive.ql.metadata.Hive) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Aggregations

DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)8 Path (org.apache.hadoop.fs.Path)4 FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)4 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 Partition (org.apache.hadoop.hive.ql.metadata.Partition)3 LoadFileDesc (org.apache.hadoop.hive.ql.plan.LoadFileDesc)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 LinkedHashMap (java.util.LinkedHashMap)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)2 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)2 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)2 FileNotFoundException (java.io.FileNotFoundException)1 Serializable (java.io.Serializable)1