Search in sources :

Example 1 with PartialScanWork

use of org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork in project hive by apache.

the class GenMRTableScan1 method handlePartialScanCommand.

/**
   * handle partial scan command. It is composed of PartialScanTask followed by StatsTask .
   * @param op
   * @param ctx
   * @param parseCtx
   * @param currTask
   * @param parseInfo
   * @param statsWork
   * @param statsTask
   * @throws SemanticException
   */
private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task<? extends Serializable> currTask, StatsWork statsWork, Task<StatsWork> statsTask) throws SemanticException {
    String aggregationKey = op.getConf().getStatsAggPrefix();
    StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();
    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);
    scanWork.setStatsTmpDir(op.getConf().getTmpStatsDir(), parseCtx.getConf());
    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);
    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf());
    psTask.initialize(parseCtx.getQueryState(), null, driverCxt, op.getCompilationOpContext());
    psTask.setWork(scanWork);
    // task dependency
    ctx.getRootTasks().remove(currTask);
    ctx.getRootTasks().add(psTask);
    psTask.addDependentTask(statsTask);
    List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>();
    parentTasks.add(psTask);
    statsTask.setParentTasks(parentTasks);
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Serializable(java.io.Serializable) PartialScanWork(org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork) ArrayList(java.util.ArrayList)

Example 2 with PartialScanWork

use of org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork in project hive by apache.

the class SparkProcessAnalyzeTable method handlePartialScanCommand.

/**
   * handle partial scan command.
   *
   * It is composed of PartialScanTask followed by StatsTask.
   */
private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenSparkProcContext context, Task<StatsWork> statsTask) throws SemanticException {
    String aggregationKey = tableScan.getConf().getStatsAggPrefix();
    StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();
    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);
    scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf());
    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);
    // partial scan task
    DriverContext driverCxt = new DriverContext();
    @SuppressWarnings("unchecked") Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
    partialScanTask.initialize(parseContext.getQueryState(), null, driverCxt, tableScan.getCompilationOpContext());
    partialScanTask.setWork(scanWork);
    statsWork.setSourceTask(partialScanTask);
    // task dependency
    context.rootTasks.remove(context.currentTask);
    context.rootTasks.add(partialScanTask);
    partialScanTask.addDependentTask(statsTask);
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) PartialScanWork(org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork)

Example 3 with PartialScanWork

use of org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork in project hive by apache.

the class ProcessAnalyzeTable method handlePartialScanCommand.

/**
   * handle partial scan command.
   *
   * It is composed of PartialScanTask followed by StatsTask.
   */
private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task<StatsWork> statsTask) throws SemanticException {
    String aggregationKey = tableScan.getConf().getStatsAggPrefix();
    StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();
    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);
    scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf());
    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);
    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
    partialScanTask.initialize(parseContext.getQueryState(), null, driverCxt, tableScan.getCompilationOpContext());
    partialScanTask.setWork(scanWork);
    statsWork.setSourceTask(partialScanTask);
    // task dependency
    context.rootTasks.remove(context.currentTask);
    context.rootTasks.add(partialScanTask);
    partialScanTask.addDependentTask(statsTask);
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) PartialScanWork(org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork)

Aggregations

Path (org.apache.hadoop.fs.Path)3 DriverContext (org.apache.hadoop.hive.ql.DriverContext)3 PartialScanWork (org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork)3 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Task (org.apache.hadoop.hive.ql.exec.Task)1 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)1