Search in sources :

Example 76 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MapRedTask method execute.

@Override
public int execute() {
    boolean ctxCreated = false;
    Context ctx = context;
    try {
        if (ctx == null) {
            ctx = new Context(conf);
            ctxCreated = true;
        }
        // estimate number of reducers
        setNumberOfReducers();
        // auto-determine local mode if allowed
        if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
            if (inputSummary == null) {
                inputSummary = Utilities.getInputSummary(ctx, work.getMapWork(), null);
            }
            // set the values of totalInputFileSize and totalInputNumFiles, estimating them
            // if percentage block sampling is being used
            double samplePercentage = Utilities.getHighestSamplePercentage(work.getMapWork());
            totalInputFileSize = Utilities.getTotalInputFileSize(inputSummary, work.getMapWork(), samplePercentage);
            totalInputNumFiles = Utilities.getTotalInputNumFiles(inputSummary, work.getMapWork(), samplePercentage);
            // at this point the number of reducers is precisely defined in the plan
            int numReducers = work.getReduceWork() == null ? 0 : work.getReduceWork().getNumReduceTasks();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers);
            }
            String reason = MapRedTask.isEligibleForLocalMode(conf, numReducers, totalInputFileSize, totalInputNumFiles);
            if (reason == null) {
                // clone configuration before modifying it on per-task basis
                cloneConf();
                ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
                console.printInfo("Selecting local mode for task: " + getId());
                this.setLocalMode(true);
            } else {
                console.printInfo("Cannot run job locally: " + reason);
                this.setLocalMode(false);
            }
        }
        runningViaChild = conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD);
        if (!runningViaChild) {
            // in ExecDriver as well to have proper local properties.
            if (this.isLocalMode()) {
                // save the original job tracker
                ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(job));
                // change it to local
                ShimLoader.getHadoopShims().setJobLauncherRpcAddress(job, "local");
            }
            // we are not running this mapred task via child jvm
            // so directly invoke ExecDriver
            int ret = super.execute();
            // restore the previous properties for framework name, RM address etc.
            if (this.isLocalMode()) {
                // restore the local job tracker back to original
                ctx.restoreOriginalTracker();
            }
            return ret;
        }
        // we need to edit the configuration to setup cmdline. clone it first
        cloneConf();
        // propagate input format if necessary
        super.setInputAttributes(conf);
        // enable assertion
        String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
        String hiveJar = conf.getJar();
        String libJars = super.getResource(conf, ResourceType.JAR);
        String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
        // Generate the hiveConfArgs after potentially adding the jars
        String hiveConfArgs = generateCmdLine(conf, ctx);
        // write out the plan to a local file
        Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
        MapredWork plan = getWork();
        LOG.info("Generating plan file " + planPath.toString());
        OutputStream out = null;
        try {
            out = FileSystem.getLocal(conf).create(planPath);
            SerializationUtilities.serializePlan(plan, out);
            out.close();
            out = null;
        } finally {
            IOUtils.closeQuietly(out);
        }
        String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
        String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
        String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
        String workDir = (new File(".")).getCanonicalPath();
        String files = super.getResource(conf, ResourceType.FILE);
        if (!files.isEmpty()) {
            cmdLine = cmdLine + " -files " + files;
            workDir = ctx.getLocalTmpPath().toUri().getPath();
            if (!(new File(workDir)).mkdir()) {
                throw new IOException("Cannot create tmp working dir: " + workDir);
            }
            for (String f : StringUtils.split(files, ',')) {
                Path p = new Path(f);
                String target = p.toUri().getPath();
                String link = workDir + Path.SEPARATOR + p.getName();
                if (FileUtil.symLink(target, link) != 0) {
                    throw new IOException("Cannot link to added file: " + target + " from: " + link);
                }
            }
        }
        LOG.info("Executing: " + cmdLine);
        // Inherit Java system variables
        String hadoopOpts;
        StringBuilder sb = new StringBuilder();
        Properties p = System.getProperties();
        for (String element : HIVE_SYS_PROP) {
            if (p.containsKey(element)) {
                sb.append(" -D" + element + "=" + p.getProperty(element));
            }
        }
        hadoopOpts = sb.toString();
        // Inherit the environment variables
        String[] env;
        Map<String, String> variables = new HashMap<String, String>(System.getenv());
        if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
            // if we are running in local mode - then the amount of memory used
            // by the child jvm can no longer default to the memory used by the
            // parent jvm
            int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
            if (hadoopMem == 0) {
                // remove env var that would default child jvm to use parent's memory
                // as default. child jvm would use default memory for a hadoop client
                variables.remove(HADOOP_MEM_KEY);
            } else {
                // user specified the memory for local mode hadoop run
                variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
            }
        } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
        }
        if (variables.containsKey(HADOOP_OPTS_KEY)) {
            variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
        } else {
            variables.put(HADOOP_OPTS_KEY, hadoopOpts);
        }
        if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
            configureDebugVariablesForChildJVM(variables);
        }
        if (PROXY == Utils.getUGI().getAuthenticationMethod()) {
            variables.put(HADOOP_PROXY_USER, Utils.getUGI().getShortUserName());
        }
        env = new String[variables.size()];
        int pos = 0;
        for (Map.Entry<String, String> entry : variables.entrySet()) {
            String name = entry.getKey();
            String value = entry.getValue();
            env[pos++] = name + "=" + value;
        }
        // Run ExecDriver in another JVM
        executor = spawn(cmdLine, workDir, env);
        CachingPrintStream errPrintStream = new CachingPrintStream(SessionState.getConsole().getChildErrStream());
        StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, SessionState.getConsole().getChildOutStream());
        StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
        outPrinter.start();
        errPrinter.start();
        int exitVal = jobExecHelper.progressLocal(executor, getId());
        // wait for stream threads to finish
        outPrinter.join();
        errPrinter.join();
        if (exitVal != 0) {
            LOG.error("Execution failed with exit status: " + exitVal);
            if (SessionState.get() != null) {
                SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
            }
        } else {
            LOG.info("Execution completed successfully");
        }
        return exitVal;
    } catch (Exception e) {
        LOG.error("Got exception", e);
        return (1);
    } finally {
        try {
            // sure to clear it out
            if (ctxCreated) {
                ctx.clear();
            }
        } catch (Exception e) {
            LOG.error("Exception: ", e);
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Properties(java.util.Properties) JSONException(org.json.JSONException) IOException(java.io.IOException) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) StreamPrinter(org.apache.hive.common.util.StreamPrinter) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 77 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TestTezTask method testBuildDag.

@Test
public void testBuildDag() throws Exception {
    DAG dag = task.build(conf, work, path, new Context(conf), DagUtils.createTezLrMap(appLr, null));
    for (BaseWork w : work.getAllWork()) {
        Vertex v = dag.getVertex(w.getName());
        assertNotNull(v);
        List<Vertex> outs = v.getOutputVertices();
        for (BaseWork x : work.getChildren(w)) {
            boolean found = false;
            for (Vertex u : outs) {
                if (u.getName().equals(x.getName())) {
                    found = true;
                    break;
                }
            }
            assertTrue(found);
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Vertex(org.apache.tez.dag.api.Vertex) DAG(org.apache.tez.dag.api.DAG) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) Test(org.junit.Test)

Example 78 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class DbTxnManagerEndToEndTestBase method setUp.

@Before
public void setUp() throws Exception {
    // set up metastore client cache
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.MSC_CACHE_ENABLED)) {
        HiveMetaStoreClientWithLocalCache.init(conf);
    }
    SessionState.start(conf);
    ctx = new Context(conf);
    driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build());
    driver2 = new Driver(new QueryState.Builder().withHiveConf(conf).build());
    HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_LOCKS_PARTITION_THRESHOLD, -1);
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED, false);
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.TXN_WRITE_X_LOCK, false);
    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.TXN_USE_MIN_HISTORY_LEVEL, true);
    TestTxnDbUtil.cleanDb(conf);
    SessionState ss = SessionState.get();
    ss.initTxnMgr(conf);
    txnMgr = ss.getTxnMgr();
    Assert.assertTrue(txnMgr instanceof DbTxnManager);
    txnHandler = TxnUtils.getTxnStore(conf);
    File f = new File(getWarehouseDir());
    if (f.exists()) {
        FileUtil.fullyDelete(f);
    }
    if (!(new File(getWarehouseDir()).mkdirs())) {
        throw new RuntimeException("Could not create " + getWarehouseDir());
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Driver(org.apache.hadoop.hive.ql.Driver) QueryState(org.apache.hadoop.hive.ql.QueryState) File(java.io.File) Before(org.junit.Before)

Example 79 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class AlterMaterializedViewRewriteOperation method execute.

@Override
public int execute() throws HiveException {
    Table mv = context.getDb().getTable(desc.getMaterializedViewName());
    if (mv.isRewriteEnabled() == desc.isRewriteEnable()) {
        // This is a noop, return successfully
        return 0;
    }
    // Do not mess with Table instance
    Table newMV = mv.copy();
    if (desc.isRewriteEnable()) {
        try {
            QueryState qs = new QueryState.Builder().withHiveConf(context.getConf()).build();
            CalcitePlanner planner = new CalcitePlanner(qs);
            Context ctx = new Context(context.getConf());
            ctx.setIsLoadingMaterializedView(true);
            planner.initCtx(ctx);
            planner.init(false);
            RelNode plan = planner.genLogicalPlan(ParseUtils.parse(newMV.getViewExpandedText(), ctx));
            if (plan == null) {
                String msg = "Cannot enable automatic rewriting for materialized view.";
                if (ctx.getCboInfo() != null) {
                    msg += " " + ctx.getCboInfo();
                }
                throw new HiveException(msg);
            }
            if (!planner.isValidAutomaticRewritingMaterialization()) {
                throw new HiveException("Cannot enable rewriting for materialized view. " + planner.getInvalidAutomaticRewritingMaterializationReason());
            }
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
    newMV.setRewriteEnabled(desc.isRewriteEnable());
    EnvironmentContext environmentContext = new EnvironmentContext();
    environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    context.getDb().alterTable(newMV, false, environmentContext, true);
    return 0;
}
Also used : DDLOperationContext(org.apache.hadoop.hive.ql.ddl.DDLOperationContext) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Context(org.apache.hadoop.hive.ql.Context) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) RelNode(org.apache.calcite.rel.RelNode) CalcitePlanner(org.apache.hadoop.hive.ql.parse.CalcitePlanner) QueryState(org.apache.hadoop.hive.ql.QueryState) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 80 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class AlterTableConcatenateOperation method execute.

@Override
public int execute() throws HiveException {
    Context generalContext = context.getContext();
    MergeFileWork mergeWork = getMergeFileWork(generalContext.getOpContext());
    Task<?> task = getTask(mergeWork);
    return executeTask(generalContext, task);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DDLOperationContext(org.apache.hadoop.hive.ql.ddl.DDLOperationContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)103 Path (org.apache.hadoop.fs.Path)45 IOException (java.io.IOException)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)20 Test (org.junit.Test)19 FileSystem (org.apache.hadoop.fs.FileSystem)16 HiveConf (org.apache.hadoop.hive.conf.HiveConf)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 HashMap (java.util.HashMap)13 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)13 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)13 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)13 ArrayList (java.util.ArrayList)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 Table (org.apache.hadoop.hive.ql.metadata.Table)12 JobConf (org.apache.hadoop.mapred.JobConf)12 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)9 QueryState (org.apache.hadoop.hive.ql.QueryState)8