Examples with LinkedHashSet - java.util.LinkedHashSet

Example 21 with LinkedHashSet

use of java.util.LinkedHashSet in project hive by apache.

the class TaskCompiler method compile.

@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();
    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }
        return;
    }
    optimizeOperatorPlan(pCtx, inputs, outputs);
    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }
        LoadFileDesc loadFileDesc = loadFileWork.get(0);
        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();
        String resFileFormat;
        TableDesc resultTab = pCtx.getFetchTableDesc();
        if (resultTab == null) {
            resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            } else {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
            }
        } else {
            if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            }
        }
        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }
        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));
        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();
        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
            mvTask.add(tsk);
            // Check to see if we are stale'ing any indexes and auto-update them if we want
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
                IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
                try {
                    List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
                    for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
                        tsk.addDependentTask(updateTask);
                    }
                } catch (HiveException e) {
                    console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
                }
            }
        }
        boolean oneLoadFile = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                // should not have more than 1 load file for
                assert (oneLoadFile);
                // CTAS
                // make the movetask's destination directory the table's destination.
                Path location;
                String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation() : pCtx.getCreateViewDesc().getLocation();
                if (loc == null) {
                    // get the default location
                    Path targetPath;
                    try {
                        String protoName = null;
                        if (pCtx.getQueryProperties().isCTAS()) {
                            protoName = pCtx.getCreateTable().getTableName();
                        } else if (pCtx.getQueryProperties().isMaterializedView()) {
                            protoName = pCtx.getCreateViewDesc().getViewName();
                        }
                        String[] names = Utilities.getDbTableName(protoName);
                        if (!db.databaseExists(names[0])) {
                            throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
                        }
                        Warehouse wh = new Warehouse(conf);
                        targetPath = wh.getTablePath(db.getDatabase(names[0]), names[1]);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    } catch (MetaException e) {
                        throw new SemanticException(e);
                    }
                    location = targetPath;
                } else {
                    location = new Path(loc);
                }
                lfd.setTargetDir(location);
                oneLoadFile = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
        }
    }
    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }
    optimizeTaskPlan(rootTasks, pCtx, ctx);
    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
        getLeafTasks(rootTasks, leafTasks);
        if (isCStats) {
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
        } else {
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector);
                }
            }
        }
    }
    decideExecMode(rootTasks, ctx, globalLimitCtx);
    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();
        crtTblDesc.validate(conf);
        Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc), conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc), conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
    }
    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }
    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
        List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
        for (ExecDriver tsk : mrTasks) {
            tsk.setRetryCmdWhenFail(true);
        }
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
        for (SparkTask sparkTask : sparkTasks) {
            sparkTask.setRetryCmdWhenFail(true);
        }
    }
    Interner<TableDesc> interner = Interners.newStrongInterner();
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.internTableDesc(rootTask, interner);
        GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
    }
}

Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LinkedHashSet(java.util.LinkedHashSet) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) ColumnStatsTask(org.apache.hadoop.hive.ql.exec.ColumnStatsTask) Serializable(java.io.Serializable) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) ArrayList(java.util.ArrayList) ThriftFormatter(org.apache.hadoop.hive.serde2.thrift.ThriftFormatter) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) Path(org.apache.hadoop.fs.Path) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DefaultFetchFormatter(org.apache.hadoop.hive.serde2.DefaultFetchFormatter) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc)

Example 22 with LinkedHashSet

use of java.util.LinkedHashSet in project hive by apache.

the class Driver method execute.

public int execute(boolean deferClose) throws CommandNeedRetryException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
    boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
    Metrics metrics = MetricsFactory.getInstance();
    String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
    // Get the query string from the conf file as the compileInternal() method might
    // hide sensitive information during query redaction.
    String queryStr = conf.getQueryString();
    lDrvState.stateLock.lock();
    try {
        // a combined compile/execute in runInternal, throws the error
        if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) {
            SQLState = "HY008";
            errorMessage = "FAILED: query " + queryStr + " has " + (lDrvState.driverState == DriverState.INTERRUPT ? "been cancelled" : "not been compiled.");
            console.printError(errorMessage);
            return 1000;
        } else {
            lDrvState.driverState = DriverState.EXECUTING;
        }
    } finally {
        lDrvState.stateLock.unlock();
    }
    maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
    HookContext hookContext = null;
    // Whether there's any error occurred during query execution. Used for query lifetime hook.
    boolean executionError = false;
    try {
        LOG.info("Executing command(queryId=" + queryId + "): " + queryStr);
        // compile and execute can get called from different threads in case of HS2
        // so clear timing in this thread's Hive object before proceeding.
        Hive.get().clearMetaCallTiming();
        plan.setStarted();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            SessionState.get().getHiveHistory().logPlanProgress(plan);
        }
        resStream = null;
        SessionState ss = SessionState.get();
        hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), ss.getUserFromAuthenticator(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger);
        hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
        for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            } else if (peh instanceof PreExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            }
        }
        // Trigger query hooks before query execution.
        if (queryHooks != null && !queryHooks.isEmpty()) {
            QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
            qhc.setHiveConf(conf);
            qhc.setCommand(queryStr);
            qhc.setHookContext(hookContext);
            for (QueryLifeTimeHook hook : queryHooks) {
                hook.beforeExecution(qhc);
            }
        }
        setQueryDisplays(plan.getRootTasks());
        int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
        int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size();
        if (jobs > 0) {
            logMrWarning(mrJobs);
            console.printInfo("Query ID = " + queryId);
            console.printInfo("Total jobs = " + jobs);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
            SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
        }
        String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
        if (isInterrupted()) {
            return handleInterruption("before running tasks.");
        }
        DriverContext driverCxt = new DriverContext(ctx);
        driverCxt.prepare(plan);
        ctx.setHDFSCleanup(true);
        // for canceling the query (should be bound to session?)
        this.driverCxt = driverCxt;
        SessionState.get().setMapRedStats(new LinkedHashMap<String, MapRedStats>());
        SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
        SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
        // Add root Tasks to runnable
        for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
            // incorrect results.
            assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
            driverCxt.addToRunnable(tsk);
            if (metrics != null) {
                tsk.updateTaskMetrics(metrics);
            }
        }
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
        // Loop while you either have tasks running, or tasks queued up
        while (driverCxt.isRunning()) {
            // Launch upto maxthreads tasks
            Task<? extends Serializable> task;
            while ((task = driverCxt.getRunnable(maxthreads)) != null) {
                TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
                if (!runner.isRunning()) {
                    break;
                }
            }
            // poll the Tasks to see which one completed
            TaskRunner tskRun = driverCxt.pollFinished();
            if (tskRun == null) {
                continue;
            }
            hookContext.addCompleteTask(tskRun);
            queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult());
            Task<? extends Serializable> tsk = tskRun.getTask();
            TaskResult result = tskRun.getTaskResult();
            int exitVal = result.getExitVal();
            if (isInterrupted()) {
                return handleInterruption("when checking the execution result.");
            }
            if (exitVal != 0) {
                if (tsk.ifRetryCmdWhenFail()) {
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    throw new CommandNeedRetryException();
                }
                Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
                if (backupTask != null) {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    console.printError(errorMessage);
                    errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
                    console.printError(errorMessage);
                    // add backup task to runnable
                    if (DriverContext.isLaunchable(backupTask)) {
                        driverCxt.addToRunnable(backupTask);
                    }
                    continue;
                } else {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError());
                    SQLState = "08S01";
                    console.printError(errorMessage);
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    return exitVal;
                }
            }
            driverCxt.finished(tskRun);
            if (SessionState.get() != null) {
                SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
                SessionState.get().getHiveHistory().endTask(queryId, tsk);
            }
            if (tsk.getChildTasks() != null) {
                for (Task<? extends Serializable> child : tsk.getChildTasks()) {
                    if (DriverContext.isLaunchable(child)) {
                        driverCxt.addToRunnable(child);
                    }
                }
            }
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);
        // in case we decided to run everything in local mode, restore the
        // the jobtracker setting to its initial value
        ctx.restoreOriginalTracker();
        if (driverCxt.isShutdown()) {
            SQLState = "HY008";
            errorMessage = "FAILED: Operation cancelled";
            invokeFailureHooks(perfLogger, hookContext, errorMessage, null);
            console.printError(errorMessage);
            return 1000;
        }
        // remove incomplete outputs.
        // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
        // remove them
        HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
        for (WriteEntity output : plan.getOutputs()) {
            if (!output.isComplete()) {
                remOutputs.add(output);
            }
        }
        for (WriteEntity output : remOutputs) {
            plan.getOutputs().remove(output);
        }
        hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
        // Get all the post execution hooks and execute them.
        for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            } else if (peh instanceof PostExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            }
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
            SessionState.get().getHiveHistory().printRowCount(queryId);
        }
        releasePlan(plan);
    } catch (CommandNeedRetryException e) {
        executionError = true;
        throw e;
    } catch (Throwable e) {
        executionError = true;
        if (isInterrupted()) {
            return handleInterruption("during query execution: \n" + e.getMessage());
        }
        ctx.restoreOriginalTracker();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
        }
        // TODO: do better with handling types of Exception here
        errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
        if (hookContext != null) {
            try {
                invokeFailureHooks(perfLogger, hookContext, errorMessage, e);
            } catch (Exception t) {
                LOG.warn("Failed to invoke failure hook", t);
            }
        }
        SQLState = "08S01";
        downstreamError = e;
        console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return (12);
    } finally {
        // Trigger query hooks after query completes its execution.
        try {
            if (queryHooks != null && !queryHooks.isEmpty()) {
                QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
                qhc.setHiveConf(conf);
                qhc.setCommand(queryStr);
                qhc.setHookContext(hookContext);
                for (QueryLifeTimeHook hook : queryHooks) {
                    hook.afterExecution(qhc, executionError);
                }
            }
        } catch (Exception e) {
            LOG.warn("Failed when invoking query after execution hook", e);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().endQuery(queryId);
        }
        if (noName) {
            conf.set(MRJobConfig.JOB_NAME, "");
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00;
        ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings);
        Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
        if (stats != null && !stats.isEmpty()) {
            long totalCpu = 0;
            console.printInfo("MapReduce Jobs Launched: ");
            for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) {
                console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue());
                totalCpu += entry.getValue().getCpuMSec();
            }
            console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
        }
        boolean isInterrupted = isInterrupted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                if (!deferClose) {
                    lDrvState.driverState = DriverState.ERROR;
                }
            } else {
                lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
    if (console != null) {
        console.printInfo("OK");
    }
    return (0);
}

Also used : LinkedHashSet(java.util.LinkedHashSet) SessionState(org.apache.hadoop.hive.ql.session.SessionState) QueryLifeTimeHookContextImpl(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContextImpl) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) Metrics(org.apache.hadoop.hive.common.metrics.common.Metrics) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) LinkedList(java.util.LinkedList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HiveSemanticAnalyzerHook(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) Hook(org.apache.hadoop.hive.ql.hooks.Hook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) PreExecute(org.apache.hadoop.hive.ql.hooks.PreExecute) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) PostExecute(org.apache.hadoop.hive.ql.hooks.PostExecute) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) TaskResult(org.apache.hadoop.hive.ql.exec.TaskResult) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 23 with LinkedHashSet

use of java.util.LinkedHashSet in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

/**
   * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
   * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
   */
public TrimResult trimFields(HiveMultiJoin join, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final int fieldCount = join.getRowType().getFieldCount();
    final RexNode conditionExpr = join.getCondition();
    final List<RexNode> joinFilters = join.getJoinFilters();
    // Add in fields used in the condition.
    final Set<RelDataTypeField> combinedInputExtraFields = new LinkedHashSet<RelDataTypeField>(extraFields);
    RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(combinedInputExtraFields);
    inputFinder.inputBitSet.addAll(fieldsUsed);
    conditionExpr.accept(inputFinder);
    final ImmutableBitSet fieldsUsedPlus = inputFinder.inputBitSet.build();
    int inputStartPos = 0;
    int changeCount = 0;
    int newFieldCount = 0;
    List<RelNode> newInputs = new ArrayList<RelNode>();
    List<Mapping> inputMappings = new ArrayList<Mapping>();
    for (RelNode input : join.getInputs()) {
        final RelDataType inputRowType = input.getRowType();
        final int inputFieldCount = inputRowType.getFieldCount();
        // Compute required mapping.
        ImmutableBitSet.Builder inputFieldsUsed = ImmutableBitSet.builder();
        for (int bit : fieldsUsedPlus) {
            if (bit >= inputStartPos && bit < inputStartPos + inputFieldCount) {
                inputFieldsUsed.set(bit - inputStartPos);
            }
        }
        Set<RelDataTypeField> inputExtraFields = Collections.<RelDataTypeField>emptySet();
        TrimResult trimResult = trimChild(join, input, inputFieldsUsed.build(), inputExtraFields);
        newInputs.add(trimResult.left);
        if (trimResult.left != input) {
            ++changeCount;
        }
        final Mapping inputMapping = trimResult.right;
        inputMappings.add(inputMapping);
        // Move offset to point to start of next input.
        inputStartPos += inputFieldCount;
        newFieldCount += inputMapping.getTargetCount();
    }
    Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, newFieldCount);
    int offset = 0;
    int newOffset = 0;
    for (int i = 0; i < inputMappings.size(); i++) {
        Mapping inputMapping = inputMappings.get(i);
        for (IntPair pair : inputMapping) {
            mapping.set(pair.source + offset, pair.target + newOffset);
        }
        offset += inputMapping.getSourceCount();
        newOffset += inputMapping.getTargetCount();
    }
    if (changeCount == 0 && mapping.isIdentity()) {
        return new TrimResult(join, Mappings.createIdentity(fieldCount));
    }
    // Build new join.
    final RexVisitor<RexNode> shuttle = new RexPermuteInputsShuttle(mapping, newInputs.toArray(new RelNode[newInputs.size()]));
    RexNode newConditionExpr = conditionExpr.accept(shuttle);
    List<RexNode> newJoinFilters = Lists.newArrayList();
    for (RexNode joinFilter : joinFilters) {
        newJoinFilters.add(joinFilter.accept(shuttle));
    }
    final RelDataType newRowType = RelOptUtil.permute(join.getCluster().getTypeFactory(), join.getRowType(), mapping);
    final RelNode newJoin = new HiveMultiJoin(join.getCluster(), newInputs, newConditionExpr, newRowType, join.getJoinInputs(), join.getJoinTypes(), newJoinFilters);
    return new TrimResult(newJoin, mapping);
}

Also used : LinkedHashSet(java.util.LinkedHashSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) RelDataType(org.apache.calcite.rel.type.RelDataType) IntPair(org.apache.calcite.util.mapping.IntPair) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) RexNode(org.apache.calcite.rex.RexNode)

Example 24 with LinkedHashSet

use of java.util.LinkedHashSet in project hive by apache.

the class PartitionPruner method prune.

/**
   * Get the partition list for the table that satisfies the partition pruner
   * condition.
   *
   * @param tab
   *          the table object for the alias
   * @param prunerExpr
   *          the pruner expression for the alias
   * @param conf
   *          for checking whether "strict" mode is on.
   * @param alias
   *          for generating error message only.
   * @param prunedPartitionsMap
   *          cached result for the table
   * @return the partition list for the table that satisfies the partition
   *         pruner condition.
   * @throws SemanticException
   */
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws SemanticException {
    if (LOG.isTraceEnabled()) {
        LOG.trace("Started pruning partition");
        LOG.trace("dbname = " + tab.getDbName());
        LOG.trace("tabname = " + tab.getTableName());
        LOG.trace("prune Expression = " + (prunerExpr == null ? "" : prunerExpr));
    }
    String key = tab.getDbName() + "." + tab.getTableName() + ";";
    if (!tab.isPartitioned()) {
        // If the table is not partitioned, return empty list.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    if (!hasColumnExpr(prunerExpr)) {
        // If the "strict" mode is on, we have to provide partition pruner for each table.
        String error = StrictChecks.checkNoPartitionFilter(conf);
        if (error != null) {
            throw new SemanticException(error + " No partition predicate for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"");
        }
    }
    if (prunerExpr == null) {
        // In non-strict mode and there is no predicates at all - get everything.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    Set<String> partColsUsedInFilter = new LinkedHashSet<String>();
    // Replace virtual columns with nulls. See javadoc for details.
    prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
    // Remove all parts that are not partition columns. See javadoc for details.
    ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
    String oldFilter = prunerExpr.getExprString();
    if (compactExpr == null || isBooleanExpr(compactExpr)) {
        if (isFalseExpr(compactExpr)) {
            return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(0), new ArrayList<String>(0), false);
        }
        // For null and true values, return every partition
        return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter);
    }
    key = key + compactExpr.getExprString();
    PrunedPartitionList ppList = prunedPartitionsMap.get(key);
    if (ppList != null) {
        return ppList;
    }
    ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc) compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
    prunedPartitionsMap.put(key, ppList);
    return ppList;
}

Also used : LinkedHashSet(java.util.LinkedHashSet) Partition(org.apache.hadoop.hive.ql.metadata.Partition) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 25 with LinkedHashSet

use of java.util.LinkedHashSet in project hive by apache.

the class TaskExecutorService method getExecutorsStatus.

@Override
public Set<String> getExecutorsStatus() {
    // TODO Change this method to make the output easier to parse (parse programmatically)
    Set<String> result = new LinkedHashSet<>();
    Set<String> running = new LinkedHashSet<>();
    Set<String> waiting = new LinkedHashSet<>();
    StringBuilder value = new StringBuilder();
    for (Map.Entry<String, TaskWrapper> e : knownTasks.entrySet()) {
        boolean isWaiting;
        value.setLength(0);
        value.append(e.getKey());
        TaskWrapper task = e.getValue();
        boolean isFirst = true;
        TaskRunnerCallable c = task.getTaskRunnerCallable();
        if (c != null && c.getVertexSpec() != null) {
            SignableVertexSpec fs = c.getVertexSpec();
            value.append(isFirst ? " (" : ", ").append(c.getQueryId()).append("/").append(fs.getVertexName());
            isFirst = false;
        }
        value.append(isFirst ? " (" : ", ");
        if (task.isInWaitQueue()) {
            isWaiting = true;
            value.append("in queue");
        } else if (c != null) {
            long startTime = c.getStartTime();
            if (startTime != 0) {
                isWaiting = false;
                value.append("started at ").append(sdf.get().format(new Date(startTime)));
            } else {
                isWaiting = false;
                value.append("not started");
            }
        } else {
            isWaiting = true;
            value.append("has no callable");
        }
        if (task.isInPreemptionQueue()) {
            value.append(", ").append("preemptable");
        }
        value.append(")");
        if (isWaiting) {
            waiting.add(value.toString());
        } else {
            running.add(value.toString());
        }
    }
    result.addAll(waiting);
    result.addAll(running);
    return result;
}

Also used : LinkedHashSet(java.util.LinkedHashSet) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Date(java.util.Date)

Aggregations

LinkedHashSet (java.util.LinkedHashSet)1252 ArrayList (java.util.ArrayList)241 Set (java.util.Set)154 HashSet (java.util.HashSet)128 HashMap (java.util.HashMap)115 File (java.io.File)102 IOException (java.io.IOException)97 Map (java.util.Map)97 Test (org.junit.Test)94 List (java.util.List)86 LinkedHashMap (java.util.LinkedHashMap)77 ProcessResult (org.asqatasun.entity.audit.ProcessResult)77 SourceCodeRemark (org.asqatasun.entity.audit.SourceCodeRemark)73 LinkedList (java.util.LinkedList)51 Iterator (java.util.Iterator)38 TreeSet (java.util.TreeSet)34 URL (java.net.URL)33 Collection (java.util.Collection)28 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)24 ProcessRemark (org.asqatasun.entity.audit.ProcessRemark)23