Search in sources :

Example 1 with ColumnStatsTask

use of org.apache.hadoop.hive.ql.exec.ColumnStatsTask in project hive by apache.

the class TaskCompiler method genColumnStatsTask.

/**
   * A helper function to generate a column stats task on top of map-red task. The column stats
   * task fetches from the output of the map-red task, constructs the column stats object and
   * persists it to the metastore.
   *
   * This method generates a plan with a column stats task on top of map-red task and sets up the
   * appropriate metadata to be used during execution.
   *
   * @param analyzeRewrite
   * @param loadTableWork
   * @param loadFileWork
   * @param rootTasks
   * @param outerQueryLimit
   */
@SuppressWarnings("unchecked")
protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List<LoadFileDesc> loadFileWork, Set<Task<? extends Serializable>> leafTasks, int outerQueryLimit, int numBitVector) {
    ColumnStatsTask cStatsTask = null;
    ColumnStatsWork cStatsWork = null;
    FetchWork fetch = null;
    String tableName = analyzeRewrite.getTableName();
    List<String> colName = analyzeRewrite.getColName();
    List<String> colType = analyzeRewrite.getColType();
    boolean isTblLevel = analyzeRewrite.isTblLvl();
    String cols = loadFileWork.get(0).getColumns();
    String colTypes = loadFileWork.get(0).getColumnTypes();
    String resFileFormat;
    TableDesc resultTab;
    if (SessionState.get().isHiveServerQuery() && conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        resFileFormat = "SequenceFile";
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
    } else {
        resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
    }
    fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
    ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector);
    cStatsWork = new ColumnStatsWork(fetch, cStatsDesc);
    cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf);
    for (Task<? extends Serializable> tsk : leafTasks) {
        tsk.addDependentTask(cStatsTask);
    }
}
Also used : ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) ColumnStatsWork(org.apache.hadoop.hive.ql.plan.ColumnStatsWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) ColumnStatsTask(org.apache.hadoop.hive.ql.exec.ColumnStatsTask)

Aggregations

ColumnStatsTask (org.apache.hadoop.hive.ql.exec.ColumnStatsTask)1 ColumnStatsDesc (org.apache.hadoop.hive.ql.plan.ColumnStatsDesc)1 ColumnStatsWork (org.apache.hadoop.hive.ql.plan.ColumnStatsWork)1 CreateTableDesc (org.apache.hadoop.hive.ql.plan.CreateTableDesc)1 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)1 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)1 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)1 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)1 ThriftJDBCBinarySerDe (org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe)1