Search in sources :

Example 21 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TaskCompiler method compile.

@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();
    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }
        return;
    }
    optimizeOperatorPlan(pCtx, inputs, outputs);
    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }
        LoadFileDesc loadFileDesc = loadFileWork.get(0);
        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();
        String resFileFormat;
        TableDesc resultTab = pCtx.getFetchTableDesc();
        if (resultTab == null) {
            resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            } else {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
            }
        } else {
            if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            }
        }
        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }
        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));
        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();
        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
            mvTask.add(tsk);
            // Check to see if we are stale'ing any indexes and auto-update them if we want
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
                IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
                try {
                    List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
                    for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
                        tsk.addDependentTask(updateTask);
                    }
                } catch (HiveException e) {
                    console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
                }
            }
        }
        boolean oneLoadFile = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                // should not have more than 1 load file for
                assert (oneLoadFile);
                // CTAS
                // make the movetask's destination directory the table's destination.
                Path location;
                String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation() : pCtx.getCreateViewDesc().getLocation();
                if (loc == null) {
                    // get the default location
                    Path targetPath;
                    try {
                        String protoName = null;
                        if (pCtx.getQueryProperties().isCTAS()) {
                            protoName = pCtx.getCreateTable().getTableName();
                        } else if (pCtx.getQueryProperties().isMaterializedView()) {
                            protoName = pCtx.getCreateViewDesc().getViewName();
                        }
                        String[] names = Utilities.getDbTableName(protoName);
                        if (!db.databaseExists(names[0])) {
                            throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
                        }
                        Warehouse wh = new Warehouse(conf);
                        targetPath = wh.getTablePath(db.getDatabase(names[0]), names[1]);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    } catch (MetaException e) {
                        throw new SemanticException(e);
                    }
                    location = targetPath;
                } else {
                    location = new Path(loc);
                }
                lfd.setTargetDir(location);
                oneLoadFile = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
        }
    }
    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }
    optimizeTaskPlan(rootTasks, pCtx, ctx);
    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
        getLeafTasks(rootTasks, leafTasks);
        if (isCStats) {
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
        } else {
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector);
                }
            }
        }
    }
    decideExecMode(rootTasks, ctx, globalLimitCtx);
    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();
        crtTblDesc.validate(conf);
        Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc), conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc), conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
    }
    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }
    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
        List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
        for (ExecDriver tsk : mrTasks) {
            tsk.setRetryCmdWhenFail(true);
        }
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
        for (SparkTask sparkTask : sparkTasks) {
            sparkTask.setRetryCmdWhenFail(true);
        }
    }
    Interner<TableDesc> interner = Interners.newStrongInterner();
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.internTableDesc(rootTask, interner);
        GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
    }
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LinkedHashSet(java.util.LinkedHashSet) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) ColumnStatsTask(org.apache.hadoop.hive.ql.exec.ColumnStatsTask) Serializable(java.io.Serializable) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) ArrayList(java.util.ArrayList) ThriftFormatter(org.apache.hadoop.hive.serde2.thrift.ThriftFormatter) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) Path(org.apache.hadoop.fs.Path) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DefaultFetchFormatter(org.apache.hadoop.hive.serde2.DefaultFetchFormatter) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc)

Example 22 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method analyzeMerge.

/**
   * Here we take a Merge statement AST and generate a semantically equivalent multi-insert
   * statement to exectue.  Each Insert leg represents a single WHEN clause.  As much as possible,
   * the new SQL statement is made to look like the input SQL statement so that it's easier to map
   * Query Compiler errors from generated SQL to original one this way.
   * The generated SQL is a complete representation of the original input for the same reason.
   * In many places SemanticAnalyzer throws exceptions that contain (line, position) coordinates.
   * If generated SQL doesn't have everything and is patched up later, these coordinates point to
   * the wrong place.
   *
   * @throws SemanticException
   */
private void analyzeMerge(ASTNode tree) throws SemanticException {
    currentOperation = Context.Operation.MERGE;
    quotedIdenfierHelper = new IdentifierQuoter(ctx.getTokenRewriteStream());
    /*
     * See org.apache.hadoop.hive.ql.parse.TestMergeStatement for some examples of the merge AST
      For example, given:
      merge into acidTbl using nonAcidPart2 source ON acidTbl.a = source.a2
      WHEN MATCHED THEN UPDATE set b = source.b2
      WHEN NOT MATCHED THEN INSERT VALUES(source.a2, source.b2)

      We get AST like this:
      "(tok_merge " +
        "(tok_tabname acidtbl) (tok_tabref (tok_tabname nonacidpart2) source) " +
        "(= (. (tok_table_or_col acidtbl) a) (. (tok_table_or_col source) a2)) " +
        "(tok_matched " +
        "(tok_update " +
        "(tok_set_columns_clause (= (tok_table_or_col b) (. (tok_table_or_col source) b2))))) " +
        "(tok_not_matched " +
        "tok_insert " +
        "(tok_value_row (. (tok_table_or_col source) a2) (. (tok_table_or_col source) b2))))");

        And need to produce a multi-insert like this to execute:
        FROM acidTbl right outer join nonAcidPart2 ON acidTbl.a = source.a2
        Insert into table acidTbl select nonAcidPart2.a2, nonAcidPart2.b2 where acidTbl.a is null
        INSERT INTO TABLE acidTbl select target.ROW__ID, nonAcidPart2.a2, nonAcidPart2.b2 where nonAcidPart2.a2=acidTbl.a sort by acidTbl.ROW__ID
    */
    /*todo: we need some sort of validation phase over original AST to make things user friendly; for example, if
     original command refers to a column that doesn't exist, this will be caught when processing the rewritten query but
     the errors will point at locations that the user can't map to anything
     - VALUES clause must have the same number of values as target table (including partition cols).  Part cols go last in Select clause of Insert as Select
     todo: do we care to preserve comments in original SQL?
     todo: check if identifiers are propertly escaped/quoted in the generated SQL - it's currently inconsistent
      Look at UnparseTranslator.addIdentifierTranslation() - it does unescape + unparse...
     todo: consider "WHEN NOT MATCHED BY SOURCE THEN UPDATE SET TargetTable.Col1 = SourceTable.Col1 "; what happens when source is empty?  This should be a runtime error - maybe not
      the outer side of ROJ is empty => the join produces 0 rows.  If supporting WHEN NOT MATCHED BY SOURCE, then this should be a runtime error
    */
    ASTNode target = (ASTNode) tree.getChild(0);
    ASTNode source = (ASTNode) tree.getChild(1);
    String targetName = getSimpleTableName(target);
    String sourceName = getSimpleTableName(source);
    ASTNode onClause = (ASTNode) tree.getChild(2);
    String onClauseAsText = getMatchedText(onClause);
    Table targetTable = getTargetTable(target);
    validateTargetTable(targetTable);
    List<ASTNode> whenClauses = findWhenClauses(tree);
    StringBuilder rewrittenQueryStr = new StringBuilder("FROM\n");
    rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(target));
    if (isAliased(target)) {
        rewrittenQueryStr.append(" ").append(targetName);
    }
    rewrittenQueryStr.append('\n');
    rewrittenQueryStr.append(Indent).append(chooseJoinType(whenClauses)).append("\n");
    if (source.getType() == HiveParser.TOK_SUBQUERY) {
        //this includes the mandatory alias
        rewrittenQueryStr.append(Indent).append(getMatchedText(source));
    } else {
        rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(source));
        if (isAliased(source)) {
            rewrittenQueryStr.append(" ").append(sourceName);
        }
    }
    rewrittenQueryStr.append('\n');
    rewrittenQueryStr.append(Indent).append("ON ").append(onClauseAsText).append('\n');
    /**
     * We allow at most 2 WHEN MATCHED clause, in which case 1 must be Update the other Delete
     * If we have both update and delete, the 1st one (in SQL code) must have "AND <extra predicate>"
     * so that the 2nd can ensure not to process the same rows.
     * Update and Delete may be in any order.  (Insert is always last)
     */
    String extraPredicate = null;
    int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
    for (ASTNode whenClause : whenClauses) {
        switch(getWhenClauseOperation(whenClause).getType()) {
            case HiveParser.TOK_INSERT:
                handleInsert(whenClause, rewrittenQueryStr, target, onClause, targetTable, targetName, onClauseAsText);
                break;
            case HiveParser.TOK_UPDATE:
                numWhenMatchedUpdateClauses++;
                String s = handleUpdate(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
                if (numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
                    //i.e. it's the 1st WHEN MATCHED
                    extraPredicate = s;
                }
                break;
            case HiveParser.TOK_DELETE:
                numWhenMatchedDeleteClauses++;
                String s1 = handleDelete(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
                if (numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
                    //i.e. it's the 1st WHEN MATCHED
                    extraPredicate = s1;
                }
                break;
            default:
                throw new IllegalStateException("Unexpected WHEN clause type: " + whenClause.getType() + addParseInfo(whenClause));
        }
        if (numWhenMatchedDeleteClauses > 1) {
            throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_DELETE, ctx.getCmd());
        }
        if (numWhenMatchedUpdateClauses > 1) {
            throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_UPDATE, ctx.getCmd());
        }
    }
    if (numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) {
        throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd());
    }
    boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, targetTable, numWhenMatchedDeleteClauses == 0 && numWhenMatchedUpdateClauses == 0);
    ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
    Context rewrittenCtx = rr.rewrittenCtx;
    ASTNode rewrittenTree = rr.rewrittenTree;
    rewrittenCtx.setOperation(Context.Operation.MERGE);
    //set dest name mapping on new context; 1st chid is TOK_FROM
    for (int insClauseIdx = 1, whenClauseIdx = 0; insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0); insClauseIdx++, whenClauseIdx++) {
        //we've added Insert clauses in order or WHEN items in whenClauses
        ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
        switch(getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
            case HiveParser.TOK_INSERT:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
                break;
            case HiveParser.TOK_UPDATE:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.UPDATE);
                break;
            case HiveParser.TOK_DELETE:
                rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.DELETE);
                break;
            default:
                assert false;
        }
    }
    if (validating) {
        //here means the last branch of the multi-insert is Cardinality Validation
        rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
    }
    try {
        useSuper = true;
        super.analyze(rewrittenTree, rewrittenCtx);
    } finally {
        useSuper = false;
    }
    updateOutputs(targetTable);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Table(org.apache.hadoop.hive.ql.metadata.Table)

Example 23 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MoveTask method releaseLocks.

// Release all the locks acquired for this object
// This becomes important for multi-table inserts when one branch may take much more
// time than the others. It is better to release the lock for this particular insert.
// The other option is to wait for all the branches to finish, or set
// hive.multi.insert.move.tasks.share.dependencies to true, which will mean that the
// first multi-insert results will be available when all of the branches of multi-table
// inserts are done.
private void releaseLocks(LoadTableDesc ltd) throws HiveException {
    // nothing needs to be done
    if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY)) {
        return;
    }
    Context ctx = driverContext.getCtx();
    if (ctx.getHiveTxnManager().supportsAcid()) {
        //Acid LM doesn't maintain getOutputLockObjects(); this 'if' just makes it more explicit
        return;
    }
    HiveLockManager lockMgr = ctx.getHiveTxnManager().getLockManager();
    WriteEntity output = ctx.getLoadTableOutputMap().get(ltd);
    List<HiveLockObj> lockObjects = ctx.getOutputLockObjects().get(output);
    if (lockObjects == null) {
        return;
    }
    for (HiveLockObj lockObj : lockObjects) {
        List<HiveLock> locks = lockMgr.getLocks(lockObj.getObj(), false, true);
        for (HiveLock lock : locks) {
            if (lock.getHiveLockMode() == lockObj.getMode()) {
                if (ctx.getHiveLocks().remove(lock)) {
                    LOG.info("about to release lock for output: " + output.toString() + " lock: " + lock.getHiveLockObject().getName());
                    try {
                        lockMgr.unlock(lock);
                    } catch (LockException le) {
                        // should be OK since the lock is ephemeral and will eventually be deleted
                        // when the query finishes and zookeeper session is closed.
                        LOG.warn("Could not release lock " + lock.getHiveLockObject().getName());
                    }
                }
            }
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) HiveLockObj(org.apache.hadoop.hive.ql.lockmgr.HiveLockObj) HiveLock(org.apache.hadoop.hive.ql.lockmgr.HiveLock) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HiveLockManager(org.apache.hadoop.hive.ql.lockmgr.HiveLockManager)

Example 24 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenMapRedUtils method createMoveTask.

/**
   * Create and add any dependent move tasks
   *
   * @param currTask
   * @param chDir
   * @param fsOp
   * @param parseCtx
   * @param mvTasks
   * @param hconf
   * @param dependencyTask
   * @return
   */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    if (chDir) {
        FileSinkDesc fileSinkDesc = fsOp.getConf();
        dest = fileSinkDesc.getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        // Create the required temporary file in the HDFS location if the destination
        // path of the FileSinkOperator table is a blobstore path.
        Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.isLinkedFileSink()) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setParentDir(tmpDir);
                fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOp);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 25 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class SparkSkewJoinProcFactory method splitTask.

/**
   * If the join is not in a leaf ReduceWork, the spark task has to be split into 2 tasks.
   */
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException {
    SparkWork currentWork = currentTask.getWork();
    Set<Operator<?>> reduceSinkSet = SparkMapJoinResolver.getOp(reduceWork, ReduceSinkOperator.class);
    if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) {
        ReduceSinkOperator reduceSink = (ReduceSinkOperator) reduceSinkSet.iterator().next();
        BaseWork childWork = currentWork.getChildren(reduceWork).get(0);
        SparkEdgeProperty originEdge = currentWork.getEdgeProperty(reduceWork, childWork);
        // disconnect the reduce work from its child. this should produce two isolated sub graphs
        currentWork.disconnect(reduceWork, childWork);
        // move works following the current reduce work into a new spark work
        SparkWork newWork = new SparkWork(parseContext.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
        newWork.add(childWork);
        copyWorkGraph(currentWork, newWork, childWork);
        // remove them from current spark work
        for (BaseWork baseWork : newWork.getAllWorkUnsorted()) {
            currentWork.remove(baseWork);
            currentWork.getCloneToWork().remove(baseWork);
        }
        // create TS to read intermediate data
        Context baseCtx = parseContext.getContext();
        Path taskTmpDir = baseCtx.getMRTmpPath();
        Operator<? extends OperatorDesc> rsParent = reduceSink.getParentOperators().get(0);
        TableDesc tableDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rsParent.getSchema(), "temporarycol"));
        // this will insert FS and TS between the RS and its parent
        TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(rsParent, reduceSink, taskTmpDir, tableDesc, parseContext);
        // create new MapWork
        MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
        mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
        newWork.add(mapWork);
        newWork.connect(mapWork, childWork, originEdge);
        // setup the new map work
        String streamDesc = taskTmpDir.toUri().toString();
        if (GenMapRedUtils.needsTagging((ReduceWork) childWork)) {
            Operator<? extends OperatorDesc> childReducer = ((ReduceWork) childWork).getReducer();
            String id = null;
            if (childReducer instanceof JoinOperator) {
                if (parseContext.getJoinOps().contains(childReducer)) {
                    id = ((JoinOperator) childReducer).getConf().getId();
                }
            } else if (childReducer instanceof MapJoinOperator) {
                if (parseContext.getMapJoinOps().contains(childReducer)) {
                    id = ((MapJoinOperator) childReducer).getConf().getId();
                }
            } else if (childReducer instanceof SMBMapJoinOperator) {
                if (parseContext.getSmbMapJoinOps().contains(childReducer)) {
                    id = ((SMBMapJoinOperator) childReducer).getConf().getId();
                }
            }
            if (id != null) {
                streamDesc = id + ":$INTNAME";
            } else {
                streamDesc = "$INTNAME";
            }
            String origStreamDesc = streamDesc;
            int pos = 0;
            while (mapWork.getAliasToWork().get(streamDesc) != null) {
                streamDesc = origStreamDesc.concat(String.valueOf(++pos));
            }
        }
        GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc, tableScanOp, mapWork, false, tableDesc);
        // insert the new task between current task and its child
        @SuppressWarnings("unchecked") Task<? extends Serializable> newTask = TaskFactory.get(newWork, parseContext.getConf());
        List<Task<? extends Serializable>> childTasks = currentTask.getChildTasks();
        // must have at most one child
        if (childTasks != null && childTasks.size() > 0) {
            Task<? extends Serializable> childTask = childTasks.get(0);
            currentTask.removeDependentTask(childTask);
            newTask.addDependentTask(childTask);
        }
        currentTask.addDependentTask(newTask);
        newTask.setFetchSource(currentTask.isFetchSource());
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Path(org.apache.hadoop.fs.Path) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)47 Path (org.apache.hadoop.fs.Path)19 IOException (java.io.IOException)15 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)13 FileSystem (org.apache.hadoop.fs.FileSystem)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 Serializable (java.io.Serializable)8 Task (org.apache.hadoop.hive.ql.exec.Task)7 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Table (org.apache.hadoop.hive.ql.metadata.Table)5 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5 ArrayList (java.util.ArrayList)4 EnvironmentContext (org.apache.hadoop.hive.metastore.api.EnvironmentContext)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 JobClient (org.apache.hadoop.mapred.JobClient)4 JobConf (org.apache.hadoop.mapred.JobConf)4