Search in sources :

Example 1 with ColumnStatsDesc

use of org.apache.hadoop.hive.ql.plan.ColumnStatsDesc in project hive by apache.

the class TaskCompiler method genColumnStatsTask.

/**
   * A helper function to generate a column stats task on top of map-red task. The column stats
   * task fetches from the output of the map-red task, constructs the column stats object and
   * persists it to the metastore.
   *
   * This method generates a plan with a column stats task on top of map-red task and sets up the
   * appropriate metadata to be used during execution.
   *
   * @param analyzeRewrite
   * @param loadTableWork
   * @param loadFileWork
   * @param rootTasks
   * @param outerQueryLimit
   */
@SuppressWarnings("unchecked")
protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List<LoadFileDesc> loadFileWork, Set<Task<? extends Serializable>> leafTasks, int outerQueryLimit, int numBitVector) {
    ColumnStatsTask cStatsTask = null;
    ColumnStatsWork cStatsWork = null;
    FetchWork fetch = null;
    String tableName = analyzeRewrite.getTableName();
    List<String> colName = analyzeRewrite.getColName();
    List<String> colType = analyzeRewrite.getColType();
    boolean isTblLevel = analyzeRewrite.isTblLvl();
    String cols = loadFileWork.get(0).getColumns();
    String colTypes = loadFileWork.get(0).getColumnTypes();
    String resFileFormat;
    TableDesc resultTab;
    if (SessionState.get().isHiveServerQuery() && conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        resFileFormat = "SequenceFile";
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
    } else {
        resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
    }
    fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
    ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector);
    cStatsWork = new ColumnStatsWork(fetch, cStatsDesc);
    cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf);
    for (Task<? extends Serializable> tsk : leafTasks) {
        tsk.addDependentTask(cStatsTask);
    }
}
Also used : ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) ColumnStatsWork(org.apache.hadoop.hive.ql.plan.ColumnStatsWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) ColumnStatsTask(org.apache.hadoop.hive.ql.exec.ColumnStatsTask)

Example 2 with ColumnStatsDesc

use of org.apache.hadoop.hive.ql.plan.ColumnStatsDesc in project hive by apache.

the class TaskCompiler method genColumnStatsTask.

/**
 * A helper function to generate a column stats task on top of map-red task. The column stats
 * task fetches from the output of the map-red task, constructs the column stats object and
 * persists it to the metastore.
 *
 * This method generates a plan with a column stats task on top of map-red task and sets up the
 * appropriate metadata to be used during execution.
 */
protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List<LoadFileDesc> loadFileWork, Map<String, StatsTask> map, int outerQueryLimit, int numBitVector) throws SemanticException {
    FetchWork fetch;
    String tableName = analyzeRewrite.getTableName();
    List<String> colName = analyzeRewrite.getColName();
    List<String> colType = analyzeRewrite.getColType();
    boolean isTblLevel = analyzeRewrite.isTblLvl();
    String cols = loadFileWork.get(0).getColumns();
    String colTypes = loadFileWork.get(0).getColumnTypes();
    TableDesc resultTab;
    if (SessionState.get().isHiveServerQuery() && conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, ResultFileFormat.SEQUENCEFILE.toString(), ThriftJDBCBinarySerDe.class);
    } else {
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, conf.getResultFileFormat().toString(), LazySimpleSerDe.class);
    }
    fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
    ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector, fetch);
    StatsTask columnStatsTask = map.get(tableName);
    if (columnStatsTask == null) {
        throw new SemanticException("Can not find " + tableName + " in genColumnStatsTask");
    } else {
        columnStatsTask.getWork().setColStats(cStatsDesc);
    }
}
Also used : ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc)

Aggregations

ColumnStatsDesc (org.apache.hadoop.hive.ql.plan.ColumnStatsDesc)2 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)2 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)2 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)2 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)2 ThriftJDBCBinarySerDe (org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe)2 CreateTableDesc (org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc)1 ColumnStatsTask (org.apache.hadoop.hive.ql.exec.ColumnStatsTask)1 StatsTask (org.apache.hadoop.hive.ql.exec.StatsTask)1 ColumnStatsWork (org.apache.hadoop.hive.ql.plan.ColumnStatsWork)1 CreateTableDesc (org.apache.hadoop.hive.ql.plan.CreateTableDesc)1