Search in sources :

Example 36 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MapRedTask method execute.

@Override
public int execute(DriverContext driverContext) {
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    try {
        if (ctx == null) {
            ctx = new Context(conf);
            ctxCreated = true;
        }
        // estimate number of reducers
        setNumberOfReducers();
        // auto-determine local mode if allowed
        if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
            if (inputSummary == null) {
                inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null);
            }
            // set the values of totalInputFileSize and totalInputNumFiles, estimating them
            // if percentage block sampling is being used
            double samplePercentage = Utilities.getHighestSamplePercentage(work.getMapWork());
            totalInputFileSize = Utilities.getTotalInputFileSize(inputSummary, work.getMapWork(), samplePercentage);
            totalInputNumFiles = Utilities.getTotalInputNumFiles(inputSummary, work.getMapWork(), samplePercentage);
            // at this point the number of reducers is precisely defined in the plan
            int numReducers = work.getReduceWork() == null ? 0 : work.getReduceWork().getNumReduceTasks();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers);
            }
            String reason = MapRedTask.isEligibleForLocalMode(conf, numReducers, totalInputFileSize, totalInputNumFiles);
            if (reason == null) {
                // clone configuration before modifying it on per-task basis
                cloneConf();
                ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
                console.printInfo("Selecting local mode for task: " + getId());
                this.setLocalMode(true);
            } else {
                console.printInfo("Cannot run job locally: " + reason);
                this.setLocalMode(false);
            }
        }
        runningViaChild = conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD);
        if (!runningViaChild) {
            // in ExecDriver as well to have proper local properties.
            if (this.isLocalMode()) {
                // save the original job tracker
                ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(job));
                // change it to local
                ShimLoader.getHadoopShims().setJobLauncherRpcAddress(job, "local");
            }
            // we are not running this mapred task via child jvm
            // so directly invoke ExecDriver
            int ret = super.execute(driverContext);
            // restore the previous properties for framework name, RM address etc.
            if (this.isLocalMode()) {
                // restore the local job tracker back to original
                ctx.restoreOriginalTracker();
            }
            return ret;
        }
        // we need to edit the configuration to setup cmdline. clone it first
        cloneConf();
        // propagate input format if necessary
        super.setInputAttributes(conf);
        // enable assertion
        String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
        String hiveJar = conf.getJar();
        String libJars = super.getResource(conf, ResourceType.JAR);
        String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
        // Generate the hiveConfArgs after potentially adding the jars
        String hiveConfArgs = generateCmdLine(conf, ctx);
        // write out the plan to a local file
        Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
        MapredWork plan = getWork();
        LOG.info("Generating plan file " + planPath.toString());
        OutputStream out = null;
        try {
            out = FileSystem.getLocal(conf).create(planPath);
            SerializationUtilities.serializePlan(plan, out);
            out.close();
            out = null;
        } finally {
            IOUtils.closeQuietly(out);
        }
        String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
        String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
        String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
        String workDir = (new File(".")).getCanonicalPath();
        String files = super.getResource(conf, ResourceType.FILE);
        if (!files.isEmpty()) {
            cmdLine = cmdLine + " -files " + files;
            workDir = ctx.getLocalTmpPath().toUri().getPath();
            if (!(new File(workDir)).mkdir()) {
                throw new IOException("Cannot create tmp working dir: " + workDir);
            }
            for (String f : StringUtils.split(files, ',')) {
                Path p = new Path(f);
                String target = p.toUri().getPath();
                String link = workDir + Path.SEPARATOR + p.getName();
                if (FileUtil.symLink(target, link) != 0) {
                    throw new IOException("Cannot link to added file: " + target + " from: " + link);
                }
            }
        }
        LOG.info("Executing: " + cmdLine);
        // Inherit Java system variables
        String hadoopOpts;
        StringBuilder sb = new StringBuilder();
        Properties p = System.getProperties();
        for (String element : HIVE_SYS_PROP) {
            if (p.containsKey(element)) {
                sb.append(" -D" + element + "=" + p.getProperty(element));
            }
        }
        hadoopOpts = sb.toString();
        // Inherit the environment variables
        String[] env;
        Map<String, String> variables = new HashMap<String, String>(System.getenv());
        if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
            // if we are running in local mode - then the amount of memory used
            // by the child jvm can no longer default to the memory used by the
            // parent jvm
            int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
            if (hadoopMem == 0) {
                // remove env var that would default child jvm to use parent's memory
                // as default. child jvm would use default memory for a hadoop client
                variables.remove(HADOOP_MEM_KEY);
            } else {
                // user specified the memory for local mode hadoop run
                variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
            }
        } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
        }
        if (variables.containsKey(HADOOP_OPTS_KEY)) {
            variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
        } else {
            variables.put(HADOOP_OPTS_KEY, hadoopOpts);
        }
        if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
            configureDebugVariablesForChildJVM(variables);
        }
        env = new String[variables.size()];
        int pos = 0;
        for (Map.Entry<String, String> entry : variables.entrySet()) {
            String name = entry.getKey();
            String value = entry.getValue();
            env[pos++] = name + "=" + value;
        }
        // Run ExecDriver in another JVM
        executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
        CachingPrintStream errPrintStream = new CachingPrintStream(SessionState.getConsole().getChildErrStream());
        StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, SessionState.getConsole().getChildOutStream());
        StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
        outPrinter.start();
        errPrinter.start();
        int exitVal = jobExecHelper.progressLocal(executor, getId());
        // wait for stream threads to finish
        outPrinter.join();
        errPrinter.join();
        if (exitVal != 0) {
            LOG.error("Execution failed with exit status: " + exitVal);
            if (SessionState.get() != null) {
                SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
            }
        } else {
            LOG.info("Execution completed successfully");
        }
        return exitVal;
    } catch (Exception e) {
        LOG.error("Got exception", e);
        return (1);
    } finally {
        try {
            // sure to clear it out
            if (ctxCreated) {
                ctx.clear();
            }
        } catch (Exception e) {
            LOG.error("Exception: ", e);
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Properties(java.util.Properties) IOException(java.io.IOException) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) StreamPrinter(org.apache.hive.common.util.StreamPrinter) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 37 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class HiveMaterializedViewsRegistry method parseQuery.

private static RelNode parseQuery(String viewQuery) {
    try {
        final ASTNode node = ParseUtils.parse(viewQuery);
        final QueryState qs = new QueryState(SessionState.get().getConf());
        CalcitePlanner analyzer = new CalcitePlanner(qs);
        analyzer.initCtx(new Context(SessionState.get().getConf()));
        analyzer.init(false);
        return analyzer.genLogicalPlan(node);
    } catch (Exception e) {
        // We could not parse the view
        return null;
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) CalcitePlanner(org.apache.hadoop.hive.ql.parse.CalcitePlanner) QueryState(org.apache.hadoop.hive.ql.QueryState) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 38 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TestContext method setUp.

@Before
public void setUp() throws IOException {
    /* Only called to create session directories used by the Context class */
    SessionState.start(conf);
    SessionState.detachSession();
    context = new Context(conf);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Before(org.junit.Before)

Example 39 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ColumnTruncateTask method execute.

@Override
public /**
 * start a new map-reduce job to do the truncation, almost the same as ExecDriver.
 */
int execute(DriverContext driverContext) {
    HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName());
    success = true;
    HiveFileFormatUtils.prepareJobOutput(job);
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(work.getMapperClass());
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    if (work.getNumMapTasks() != null) {
        job.setNumMapTasks(work.getNumMapTasks());
    }
    // zero reducers
    job.setNumReduceTasks(0);
    if (work.getMinSplitSize() != null) {
        HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
    }
    if (work.getInputformat() != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
    }
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    Path outputPath = this.work.getOutputDir();
    Path tempOutPath = Utilities.toTempPath(outputPath);
    try {
        FileSystem fs = tempOutPath.getFileSystem(job);
        if (!fs.exists(tempOutPath)) {
            fs.mkdirs(tempOutPath);
        }
    } catch (IOException e) {
        console.printError("Can't make path " + outputPath + " : " + e.getMessage());
        return 6;
    }
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    int returnVal = 0;
    RunningJob rj = null;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    String jobName = null;
    if (noName && this.getQueryPlan() != null) {
        int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
        jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
    }
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
    }
    try {
        addInputPaths(job, work);
        MapredWork mrWork = new MapredWork();
        mrWork.setMapWork(work);
        Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
        // remove the pwd from conf file so that job tracker doesn't show this
        // logs
        String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
        if (pwd != null) {
            HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
        }
        JobClient jc = new JobClient(job);
        String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
        if (!addedJars.isEmpty()) {
            job.set("tmpjars", addedJars);
        }
        // make this client wait if job trcker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        // Finally SUBMIT the JOB!
        rj = jc.submitJob(job);
        this.jobID = rj.getJobID();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
            }
            ColumnTruncateMapper.jobClose(outputPath, success, job, console, work.getDynPartCtx(), null);
        } catch (Exception e) {
            LOG.warn("Failed while cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    return (returnVal);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) IOException(java.io.IOException) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob)

Example 40 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenMRUnion1 method processSubQueryUnionCreateIntermediate.

/**
 * Process the union when the parent is a map-reduce job. Create a temporary
 * output, and let the union task read from the temporary output.
 *
 * The files created for all the inputs are in the union context and later
 * used to initialize the union plan
 *
 * @param parent
 * @param child
 * @param uTask
 * @param ctx
 * @param uCtxTask
 */
private void processSubQueryUnionCreateIntermediate(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child, Task<? extends Serializable> uTask, GenMRProcContext ctx, GenMRUnionCtx uCtxTask) {
    ParseContext parseCtx = ctx.getParseCtx();
    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();
    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
    uCtxTask.addTTDesc(tt_desc);
    uCtxTask.addListTopOperators(tableScanOp);
    // The union task is empty. The files created for all the inputs are
    // assembled in the union context and later used to initialize the union
    // plan
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    currTask.addDependentTask(uTask);
    if (ctx.getRootTasks().contains(uTask)) {
        ctx.getRootTasks().remove(uTask);
        if (!ctx.getRootTasks().contains(currTask) && shouldBeRootTask(currTask)) {
            ctx.getRootTasks().add(currTask);
        }
    }
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) UnionProcContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)57 Path (org.apache.hadoop.fs.Path)25 IOException (java.io.IOException)19 DriverContext (org.apache.hadoop.hive.ql.DriverContext)16 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)14 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)12 FileSystem (org.apache.hadoop.fs.FileSystem)11 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)10 JobConf (org.apache.hadoop.mapred.JobConf)10 HiveConf (org.apache.hadoop.hive.conf.HiveConf)9 Serializable (java.io.Serializable)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 Test (org.junit.Test)8 Task (org.apache.hadoop.hive.ql.exec.Task)7 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)7 ArrayList (java.util.ArrayList)6 Table (org.apache.hadoop.hive.ql.metadata.Table)6 DAG (org.apache.tez.dag.api.DAG)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)5