Search in sources :

Example 11 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class GenMapRedUtils method createMoveTask.

/**
   * Create and add any dependent move tasks
   *
   * @param currTask
   * @param chDir
   * @param fsOp
   * @param parseCtx
   * @param mvTasks
   * @param hconf
   * @param dependencyTask
   * @return
   */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    if (chDir) {
        FileSinkDesc fileSinkDesc = fsOp.getConf();
        dest = fileSinkDesc.getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        // Create the required temporary file in the HDFS location if the destination
        // path of the FileSinkOperator table is a blobstore path.
        Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.isLinkedFileSink()) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setParentDir(tmpDir);
                fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOp);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 12 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class SemanticAnalyzer method genFileSinkPlan.

@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    QBMetaData qbm = qb.getMetaData();
    Integer dest_type = qbm.getDestTypeForAlias(dest);
    // destination table if any
    Table dest_tab = null;
    // should the destination table be written to using ACID
    boolean destTableIsAcid = false;
    boolean destTableIsTemporary = false;
    boolean destTableIsMaterialization = false;
    // destination partition if any
    Partition dest_part = null;
    // the intermediate destination directory
    Path queryTmpdir = null;
    // the final destination directory
    Path dest_path = null;
    TableDesc table_desc = null;
    int currentTableId = 0;
    boolean isLocal = false;
    SortBucketRSCtx rsCtx = new SortBucketRSCtx();
    DynamicPartitionCtx dpCtx = null;
    LoadTableDesc ltd = null;
    ListBucketingCtx lbCtx = null;
    Map<String, String> partSpec = null;
    switch(dest_type.intValue()) {
        case QBMetaData.DEST_TABLE:
            {
                dest_tab = qbm.getDestTableForAlias(dest);
                destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
                destTableIsTemporary = dest_tab.isTemporary();
                // Is the user trying to insert into a external tables
                if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) {
                    throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
                }
                partSpec = qbm.getPartSpecForAlias(dest);
                dest_path = dest_tab.getPath();
                // verify that our destination is empty before proceeding
                if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
                    try {
                        FileSystem fs = dest_path.getFileSystem(conf);
                        if (!MetaStoreUtils.isDirEmpty(fs, dest_path)) {
                            LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path);
                            throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
                        }
                    } catch (IOException ioe) {
                        LOG.warn("Error while trying to determine if immutable table has any data : " + dest_tab.getTableName() + " : " + dest_path);
                        throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
                    }
                }
                // check for partition
                List<FieldSchema> parts = dest_tab.getPartitionKeys();
                if (parts != null && parts.size() > 0) {
                    // table is partitioned
                    if (partSpec == null || partSpec.size() == 0) {
                        // user did NOT specify partition
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
                    }
                    dpCtx = qbm.getDPCtx(dest);
                    if (dpCtx == null) {
                        dest_tab.validatePartColumnNames(partSpec, false);
                        dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
                        qbm.setDPCtx(dest, dpCtx);
                    }
                    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) {
                        // allow DP
                        throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
                    }
                    if (dpCtx.getSPPath() != null) {
                        dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
                    }
                    if ((dest_tab.getNumBuckets() > 0)) {
                        dpCtx.setNumBuckets(dest_tab.getNumBuckets());
                    }
                }
                boolean isNonNativeTable = dest_tab.isNonNative();
                if (isNonNativeTable) {
                    queryTmpdir = dest_path;
                } else {
                    queryTmpdir = ctx.getTempDirForPath(dest_path, true);
                }
                if (dpCtx != null) {
                    // set the root of the temporary path where dynamic partition columns will populate
                    dpCtx.setRootPath(queryTmpdir);
                }
                // this table_desc does not contain the partitioning columns
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
                // NOTE: specify Dynamic partitions in dest_tab for WriteEntity
                if (!isNonNativeTable) {
                    AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                    if (destTableIsAcid) {
                        acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                        checkAcidConstraints(qb, table_desc, dest_tab);
                    }
                    ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp);
                    ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
                    ltd.setLbCtx(lbCtx);
                    loadTableWork.add(ltd);
                } else {
                    // This is a non-native table.
                    // We need to set stats as inaccurate.
                    setStatsForNonNativeTable(dest_tab);
                    // true if it is insert overwrite.
                    boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
                    createInsertDesc(dest_tab, overwrite);
                }
                WriteEntity output = null;
                // list of dynamically created partitions are known.
                if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
                    output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest));
                    if (!outputs.add(output)) {
                        throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName()));
                    }
                }
                if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) {
                    // No static partition specified
                    if (dpCtx.getNumSPCols() == 0) {
                        output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false);
                        outputs.add(output);
                        output.setDynamicPartitionWrite(true);
                    } else // part of the partition specified
                    // Create a DummyPartition in this case. Since, the metastore does not store partial
                    // partitions currently, we need to store dummy partitions
                    {
                        try {
                            String ppath = dpCtx.getSPPath();
                            ppath = ppath.substring(0, ppath.length() - 1);
                            DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, partSpec);
                            output = new WriteEntity(p, getWriteType(dest), false);
                            output.setDynamicPartitionWrite(true);
                            outputs.add(output);
                        } catch (HiveException e) {
                            throw new SemanticException(e.getMessage(), e);
                        }
                    }
                }
                ctx.getLoadTableOutputMap().put(ltd, output);
                break;
            }
        case QBMetaData.DEST_PARTITION:
            {
                dest_part = qbm.getDestPartitionForAlias(dest);
                dest_tab = dest_part.getTable();
                destTableIsAcid = AcidUtils.isAcidTable(dest_tab);
                if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) {
                    throw new SemanticException(ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName()));
                }
                Path tabPath = dest_tab.getPath();
                Path partPath = dest_part.getDataLocation();
                // verify that our destination is empty before proceeding
                if (dest_tab.isImmutable() && qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())) {
                    try {
                        FileSystem fs = partPath.getFileSystem(conf);
                        if (!MetaStoreUtils.isDirEmpty(fs, partPath)) {
                            LOG.warn("Attempted write into an immutable table partition : " + dest_tab.getTableName() + " : " + partPath);
                            throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName()));
                        }
                    } catch (IOException ioe) {
                        LOG.warn("Error while trying to determine if immutable table partition has any data : " + dest_tab.getTableName() + " : " + partPath);
                        throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage()));
                    }
                }
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                queryTmpdir = ctx.getTempDirForPath(dest_path, true);
                table_desc = Utilities.getTableDesc(dest_tab);
                // Add sorting/bucketing if needed
                input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
                idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
                currentTableId = destTableId;
                destTableId++;
                lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
                AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
                if (destTableIsAcid) {
                    acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
                    checkAcidConstraints(qb, table_desc, dest_tab);
                }
                ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
                ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
                ltd.setLbCtx(lbCtx);
                loadTableWork.add(ltd);
                if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
                }
                break;
            }
        case QBMetaData.DEST_LOCAL_FILE:
            isLocal = true;
        // fall through
        case QBMetaData.DEST_DFS_FILE:
            {
                dest_path = new Path(qbm.getDestFileForAlias(dest));
                if (isLocal) {
                    // for local directory - we always write to map-red intermediate
                    // store and then copy to local fs
                    queryTmpdir = ctx.getMRTmpPath();
                } else {
                    try {
                        Path qPath = FileUtils.makeQualified(dest_path, conf);
                        queryTmpdir = ctx.getTempDirForPath(qPath, true);
                    } catch (Exception e) {
                        throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
                    }
                }
                String cols = "";
                String colTypes = "";
                ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
                // CTAS case: the file output format and serde are defined by the create
                // table command rather than taking the default value
                List<FieldSchema> field_schemas = null;
                CreateTableDesc tblDesc = qb.getTableDesc();
                CreateViewDesc viewDesc = qb.getViewDesc();
                if (tblDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = tblDesc.isTemporary();
                    destTableIsMaterialization = tblDesc.isMaterialization();
                } else if (viewDesc != null) {
                    field_schemas = new ArrayList<FieldSchema>();
                    destTableIsTemporary = false;
                }
                boolean first = true;
                for (ColumnInfo colInfo : colInfos) {
                    String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
                    if (nm[1] != null) {
                        // non-null column alias
                        colInfo.setAlias(nm[1]);
                    }
                    //default column name
                    String colName = colInfo.getInternalName();
                    if (field_schemas != null) {
                        FieldSchema col = new FieldSchema();
                        if (!("".equals(nm[0])) && nm[1] != null) {
                            // remove ``
                            colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase();
                        }
                        colName = fixCtasColumnName(colName);
                        col.setName(colName);
                        String typeName = colInfo.getType().getTypeName();
                        // CTAS should NOT create a VOID type
                        if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
                            throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
                        }
                        col.setType(typeName);
                        field_schemas.add(col);
                    }
                    if (!first) {
                        cols = cols.concat(",");
                        colTypes = colTypes.concat(":");
                    }
                    first = false;
                    cols = cols.concat(colName);
                    // Replace VOID type with string when the output is a temp table or
                    // local files.
                    // A VOID type can be generated under the query:
                    //
                    // select NULL from tt;
                    // or
                    // insert overwrite local directory "abc" select NULL from tt;
                    //
                    // where there is no column type to which the NULL value should be
                    // converted.
                    //
                    String tName = colInfo.getType().getTypeName();
                    if (tName.equals(serdeConstants.VOID_TYPE_NAME)) {
                        colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME);
                    } else {
                        colTypes = colTypes.concat(tName);
                    }
                }
                // update the create table descriptor with the resulting schema.
                if (tblDesc != null) {
                    tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
                } else if (viewDesc != null) {
                    viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
                }
                boolean isDestTempFile = true;
                if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
                    idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
                    currentTableId = destTableId;
                    destTableId++;
                    isDestTempFile = false;
                }
                boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
                loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
                if (tblDesc == null) {
                    if (viewDesc != null) {
                        table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
                    } else if (qb.getIsQuery()) {
                        String fileFormat;
                        if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
                            fileFormat = "SequenceFile";
                            HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
                            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                            // write out formatted thrift objects to SequenceFile
                            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
                        } else {
                            fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
                            table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, LazySimpleSerDe.class);
                        }
                    } else {
                        table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
                    }
                } else {
                    table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
                }
                if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
                    throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
                }
                break;
            }
        default:
            throw new SemanticException("Unknown destination type: " + dest_type);
    }
    input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
    inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
    if (updating(dest) || deleting(dest)) {
        vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
    } else {
        try {
            StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
            }
        } catch (Exception e) {
            throw new SemanticException(e.getMessage(), e);
        }
    }
    RowSchema fsRS = new RowSchema(vecCol);
    // The output files of a FileSink can be merged if they are either not being written to a table
    // or are being written to a table which is not bucketed
    // and table the table is not sorted
    boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
    // If this table is working with ACID semantics, turn off merging
    canBeMerged &= !destTableIsAcid;
    // Generate the partition columns from the parent input
    if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
        genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
    }
    FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, dest_path);
    boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
    fileSinkDesc.setHiveServerQuery(isHiveServerQuery);
    // FileSinkOperator knows how to properly write to it.
    if (destTableIsAcid) {
        AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT);
        fileSinkDesc.setWriteType(wt);
        acidFileSinks.add(fileSinkDesc);
    }
    fileSinkDesc.setTemporary(destTableIsTemporary);
    fileSinkDesc.setMaterialization(destTableIsMaterialization);
    /* Set List Bucketing context. */
    if (lbCtx != null) {
        lbCtx.processRowSkewedIndex(fsRS);
        lbCtx.calculateSkewedValueSubDirList();
    }
    fileSinkDesc.setLbCtx(lbCtx);
    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
    if (!destTableIsMaterialization && HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
        String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
        fileSinkDesc.setStatsTmpDir(statsTmpLoc);
        LOG.debug("Set stats collection dir : " + statsTmpLoc);
    }
    if (dest_part != null) {
        try {
            String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
            fileSinkDesc.setStaticSpec(staticSpec);
        } catch (MetaException e) {
            throw new SemanticException(e);
        }
    } else if (dpCtx != null) {
        fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }
    if (isHiveServerQuery && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
    } else {
        fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
    if (ltd != null && SessionState.get() != null) {
        SessionState.get().getLineageState().mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output);
    } else if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) {
        Path tlocation = null;
        String tName = Utilities.getDbTableName(tableDesc.getTableName())[1];
        try {
            Warehouse wh = new Warehouse(conf);
            tlocation = wh.getTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName);
        } catch (MetaException | HiveException e) {
            throw new SemanticException(e);
        }
        SessionState.get().getLineageState().mapDirToOp(tlocation, (FileSinkOperator) output);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
    }
    FileSinkOperator fso = (FileSinkOperator) output;
    fso.getConf().setTable(dest_tab);
    fsopToTable.put(fso, dest_tab);
    // and it is an insert overwrite or insert into table
    if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
        if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        } else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
            genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
        }
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) FileSystem(org.apache.hadoop.fs.FileSystem) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) IOException(java.io.IOException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 13 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class GenMapRedUtils method createTemporaryFile.

/**
   * Break the pipeline between parent and child, and then
   * output data generated by parent to a temporary file stored in taskTmpDir.
   * A FileSinkOperator is added after parent to output the data.
   * Before child, we add a TableScanOperator to load data stored in the temporary
   * file back.
   * @param parent
   * @param child
   * @param taskTmpDir
   * @param tt_desc
   * @param parseCtx
   * @return The TableScanOperator inserted before child.
   */
public static TableScanOperator createTemporaryFile(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child, Path taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
    // Create a FileSinkOperator for the file name of taskTmpDir
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
    if (compressIntermediate) {
        desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
        desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends OperatorDesc> fileSinkOp = OperatorFactory.get(parent.getCompilationOpContext(), desc, parent.getSchema());
    // Connect parent to fileSinkOp
    parent.replaceChild(child, fileSinkOp);
    fileSinkOp.setParentOperators(Utilities.makeList(parent));
    // Create a dummy TableScanOperator for the file generated through fileSinkOp
    TableScanOperator tableScanOp = createTemporaryTableScanOperator(parent.getCompilationOpContext(), parent.getSchema());
    // Connect this TableScanOperator to child.
    tableScanOp.setChildOperators(Utilities.makeList(child));
    child.replaceParent(parent, tableScanOp);
    return tableScanOp;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 14 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class Vectorizer method vectorizeOperator.

public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    boolean isNative;
    switch(op.getType()) {
        case TABLESCAN:
            vectorOp = vectorizeTableScanOperator(op, vContext);
            isNative = true;
            break;
        case MAPJOIN:
            {
                if (op instanceof MapJoinOperator) {
                    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
                    MapJoinDesc desc = (MapJoinDesc) op.getConf();
                    boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
                    if (!specialize) {
                        Class<? extends Operator<?>> opClass = null;
                        // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
                        List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
                        boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
                        if (!isOuterAndFiltered) {
                            opClass = VectorMapJoinOperator.class;
                        } else {
                            opClass = VectorMapJoinOuterFilteredOperator.class;
                        }
                        vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
                        isNative = false;
                    } else {
                        // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
                        // HiveConf.setBoolVar(physicalContext.getConf(),
                        //    HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
                        vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
                        isNative = true;
                        if (vectorTaskColumnInfo != null) {
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                } else {
                    Preconditions.checkState(op instanceof SMBMapJoinOperator);
                    SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
                    VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
                    smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
                    isNative = false;
                }
            }
            break;
        case REDUCESINK:
            {
                VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
                ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
                boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
                if (!specialize) {
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
                    isNative = false;
                } else {
                    vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILTER:
            {
                vectorOp = vectorizeFilterOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
                    if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case SELECT:
            {
                vectorOp = vectorizeSelectOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
                    if (usesVectorUDFAdaptor(vectorSelectExprs)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case GROUPBY:
            {
                vectorOp = vectorizeGroupByOperator(op, vContext);
                isNative = false;
                if (vectorTaskColumnInfo != null) {
                    VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    if (!vectorGroupByDesc.isVectorOutput()) {
                        vectorTaskColumnInfo.setGroupByVectorOutput(false);
                    }
                    VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
                    if (usesVectorUDFAdaptor(vecKeyExpressions)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                    VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
                    for (VectorAggregateExpression vecAggr : vecAggregators) {
                        if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILESINK:
            {
                FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
                VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
                fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
                isNative = false;
            }
            break;
        case LIMIT:
            {
                LimitDesc limitDesc = (LimitDesc) op.getConf();
                VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
                limitDesc.setVectorDesc(vectorLimitDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
                isNative = true;
            }
            break;
        case EVENT:
            {
                AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
                VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
                eventDesc.setVectorDesc(vectorEventDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
                isNative = true;
            }
            break;
        case HASHTABLESINK:
            {
                SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
                VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
                sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
                isNative = true;
            }
            break;
        case SPARKPRUNINGSINK:
            {
                SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
                VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
                sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
                isNative = true;
            }
            break;
        default:
            // These are children of GROUP BY operators with non-vector outputs.
            isNative = false;
            vectorOp = op;
            break;
    }
    Preconditions.checkState(vectorOp != null);
    if (vectorTaskColumnInfo != null && !isNative) {
        vectorTaskColumnInfo.setAllNative(false);
    }
    LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
    LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
    if (vectorOp != op) {
        fixupParentChildOperators(op, vectorOp);
        ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
    }
    return vectorOp;
}
Also used : VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) ArrayList(java.util.ArrayList) List(java.util.List) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) SparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 15 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class Utilities method getStatsTmpDirs.

public static List<String> getStatsTmpDirs(BaseWork work, Configuration conf) {
    List<String> statsTmpDirs = new ArrayList<>();
    if (!StatsSetupConst.StatDB.fs.name().equalsIgnoreCase(HiveConf.getVar(conf, ConfVars.HIVESTATSDBCLASS))) {
        // no-op for non-fs stats collection
        return statsTmpDirs;
    }
    // if its auto-stats gather for inserts or CTAS, stats dir will be in FileSink
    Set<Operator<? extends OperatorDesc>> ops = work.getAllLeafOperators();
    if (work instanceof MapWork) {
        // if its an anlayze statement, stats dir will be in TableScan
        ops.addAll(work.getAllRootOperators());
    }
    for (Operator<? extends OperatorDesc> op : ops) {
        OperatorDesc desc = op.getConf();
        String statsTmpDir = null;
        if (desc instanceof FileSinkDesc) {
            statsTmpDir = ((FileSinkDesc) desc).getStatsTmpDir();
        } else if (desc instanceof TableScanDesc) {
            statsTmpDir = ((TableScanDesc) desc).getTmpStatsDir();
        }
        if (statsTmpDir != null && !statsTmpDir.isEmpty()) {
            statsTmpDirs.add(statsTmpDir);
        }
    }
    return statsTmpDirs;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)22 Path (org.apache.hadoop.fs.Path)13 ArrayList (java.util.ArrayList)11 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)6 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)6 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)4 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 Context (org.apache.hadoop.hive.ql.Context)2