Search in sources :

Example 16 with TezWork

use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.

the class DDLTask method mergeFiles.

/**
   * First, make sure the source table/partition is not
   * archived/indexes/non-rcfile. If either of these is true, throw an
   * exception.
   *
   * The way how it does the merge is to create a BlockMergeTask from the
   * mergeFilesDesc.
   *
   * @param db
   * @param mergeFilesDesc
   * @return
   * @throws HiveException
   */
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
    ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
    boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
    int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
    // merge work only needs input and output.
    MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName());
    LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
    ArrayList<String> inputDirstr = new ArrayList<String>(1);
    inputDirstr.add(mergeFilesDesc.getInputDir().toString());
    pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
    mergeWork.setPathToAliases(pathToAliases);
    mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
    mergeWork.resolveConcatenateMerge(db.getConf());
    mergeWork.setMapperCannotSpanPartns(true);
    mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
    final FileMergeDesc fmd;
    if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
        fmd = new RCFileMergeDesc();
    } else {
        // safe to assume else is ORC as semantic analyzer will check for RC/ORC
        fmd = new OrcFileMergeDesc();
    }
    fmd.setDpCtx(null);
    fmd.setHasDynamicPartitions(false);
    fmd.setListBucketingAlterTableConcatenate(lbatc);
    fmd.setListBucketingDepth(lbd);
    fmd.setOutputPath(mergeFilesDesc.getOutputDir());
    CompilationOpContext opContext = driverContext.getCtx().getOpContext();
    Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
    LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
    mergeWork.setAliasToWork(aliasToWork);
    DriverContext driverCxt = new DriverContext();
    Task task;
    if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        TezWork tezWork = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
        mergeWork.setName("File Merge");
        tezWork.add(mergeWork);
        task = new TezTask();
        task.setWork(tezWork);
    } else {
        task = new MergeFileTask();
        task.setWork(mergeWork);
    }
    // initialize the task and execute
    task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
    subtask = task;
    int ret = task.execute(driverCxt);
    if (subtask.getException() != null) {
        setException(subtask.getException());
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork) RCFileMergeDesc(org.apache.hadoop.hive.ql.plan.RCFileMergeDesc) ColumnTruncateTask(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask) OrcFileMergeDesc(org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc) FileMergeDesc(org.apache.hadoop.hive.ql.plan.FileMergeDesc) RCFileMergeDesc(org.apache.hadoop.hive.ql.plan.RCFileMergeDesc) OrcFileMergeDesc(org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc) ArrayList(java.util.ArrayList) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) LinkedHashMap(java.util.LinkedHashMap) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Aggregations

TezWork (org.apache.hadoop.hive.ql.plan.TezWork)16 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)8 ArrayList (java.util.ArrayList)5 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)5 Operator (org.apache.hadoop.hive.ql.exec.Operator)4 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)4 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)4 TezEdgeProperty (org.apache.hadoop.hive.ql.plan.TezEdgeProperty)4 List (java.util.List)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)3 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)3 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)3 IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 Path (org.apache.hadoop.fs.Path)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 Schema (org.apache.hadoop.hive.llap.Schema)2