Search in sources :

Example 1 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class RewriteParseContextGenerator method generateOperatorTree.

/**
   * Parse the input {@link String} command and generate an operator tree.
   * @param conf
   * @param command
   * @throws SemanticException
   */
public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, String command) throws SemanticException {
    Operator<? extends OperatorDesc> operatorTree;
    try {
        Context ctx = new Context(queryState.getConf());
        ASTNode tree = ParseUtils.parse(command, ctx);
        BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
        assert (sem instanceof SemanticAnalyzer);
        operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
        LOG.info("Sub-query Semantic Analysis Completed");
    } catch (IOException e) {
        LOG.error("IOException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (ParseException e) {
        LOG.error("ParseException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (SemanticException e) {
        LOG.error("SemanticException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    return operatorTree;
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TestUtilities method runTestGetInputSummary.

private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
    // creates scratch directories needed by the Context object
    SessionState.start(new HiveConf());
    MapWork mapWork = new MapWork();
    Context context = new Context(jobConf);
    LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
    LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
    TableScanOperator scanOp = new TableScanOperator();
    PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
    String testTableName = "testTable";
    Path testTablePath = new Path(testTableName);
    Path[] testPartitionsPaths = new Path[numOfPartitions];
    for (int i = 0; i < numOfPartitions; i++) {
        String testPartitionName = "p=" + 1;
        testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
        pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
        pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
        mapWork.getAliasToWork().put(testPartitionName, scanOp);
    }
    mapWork.setPathToAliases(pathToAliasTable);
    mapWork.setPathToPartitionInfo(pathToPartitionInfo);
    FileSystem fs = FileSystem.getLocal(jobConf);
    try {
        fs.mkdirs(testTablePath);
        byte[] data = new byte[bytesPerFile];
        for (int i = 0; i < numOfPartitions; i++) {
            fs.mkdirs(testPartitionsPaths[i]);
            FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
            out.write(data);
            out.close();
        }
        return Utilities.getInputSummary(context, mapWork, null);
    } finally {
        if (fs.exists(testTablePath)) {
            fs.delete(testTablePath, true);
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 3 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenericUDTFGetSplits method getSplits.

public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema) throws IOException {
    DAG dag = DAG.create(work.getName());
    dag.setCredentials(job.getCredentials());
    DagUtils utils = DagUtils.getInstance();
    Context ctx = new Context(job);
    MapWork mapWork = (MapWork) work.getAllWork().get(0);
    // bunch of things get setup in the context based on conf but we need only the MR tmp directory
    // for the following method.
    JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
    // TODO: should we also whitelist input formats here? from mapred.input.format.class
    Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
    FileSystem fs = scratchDir.getFileSystem(job);
    try {
        LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(), utils, job);
        Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, appJarLr, new ArrayList<LocalResource>(), fs, ctx, false, work, work.getVertexType(mapWork));
        String vertexName = wx.getName();
        dag.addVertex(wx);
        utils.addCredentials(mapWork, dag);
        // we have the dag now proceed to get the splits:
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
        HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
        List<Event> eventList = splitGenerator.initialize();
        InputSplit[] result = new InputSplit[eventList.size() - 1];
        InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
        List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
        Preconditions.checkState(hints.size() == eventList.size() - 1);
        if (LOG.isDebugEnabled()) {
            LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
        }
        LlapCoordinator coordinator = LlapCoordinator.getInstance();
        if (coordinator == null) {
            throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
        }
        // See the discussion in the implementation as to why we generate app ID.
        ApplicationId applicationId = coordinator.createExtClientAppId();
        // This assumes LLAP cluster owner is always the HS2 user.
        String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
        String queryUser = null;
        byte[] tokenBytes = null;
        LlapSigner signer = null;
        if (UserGroupInformation.isSecurityEnabled()) {
            signer = coordinator.getLlapSigner(job);
            // 1. Generate the token for query user (applies to all splits).
            queryUser = SessionState.getUserFromAuthenticator();
            if (queryUser == null) {
                queryUser = UserGroupInformation.getCurrentUser().getUserName();
                LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
            }
            LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
            // We put the query user, not LLAP user, into the message and token.
            Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
            LOG.info("Created the token for remote user: {}", token);
            bos.reset();
            token.write(dos);
            tokenBytes = bos.toByteArray();
        } else {
            queryUser = UserGroupInformation.getCurrentUser().getUserName();
        }
        LOG.info("Number of splits: " + (eventList.size() - 1));
        SignedMessage signedSvs = null;
        for (int i = 0; i < eventList.size() - 1; i++) {
            TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
            // 2. Generate the vertex/submit information for all events.
            if (i == 0) {
                // The queryId could either be picked up from the current request being processed, or
                // generated. The current request isn't exactly correct since the query is 'done' once we
                // return the results. Generating a new one has the added benefit of working once this
                // is moved out of a UDTF into a proper API.
                // Setting this to the generated AppId which is unique.
                // Despite the differences in TaskSpec, the vertex spec should be the same.
                signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
            }
            SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature);
            byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
            // 3. Generate input event.
            SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
            // 4. Make location hints.
            SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
            result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
        }
        return result;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) SubmitWorkInfo(org.apache.hadoop.hive.llap.SubmitWorkInfo) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HiveSplitGenerator(org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) LlapSigner(org.apache.hadoop.hive.llap.security.LlapSigner) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LlapTokenLocalClient(org.apache.hadoop.hive.llap.security.LlapTokenLocalClient) DagUtils(org.apache.hadoop.hive.ql.exec.tez.DagUtils) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) SignedMessage(org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage) DAG(org.apache.tez.dag.api.DAG) IOException(java.io.IOException) LlapCoordinator(org.apache.hadoop.hive.llap.coordinator.LlapCoordinator) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LoginException(javax.security.auth.login.LoginException) URISyntaxException(java.net.URISyntaxException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Event(org.apache.tez.runtime.api.Event) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 4 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class DDLTask method generateAddMmTasks.

private List<Task<?>> generateAddMmTasks(Table tbl) throws HiveException {
    // We will move all the files in the table/partition directories into the first MM
    // directory, then commit the first write ID.
    List<Path> srcs = new ArrayList<>(), tgts = new ArrayList<>();
    long mmWriteId = 0;
    try {
        HiveTxnManager txnManager = SessionState.get().getTxnMgr();
        if (txnManager.isTxnOpen()) {
            mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
        } else {
            txnManager.openTxn(new Context(conf), conf.getUser());
            mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
            txnManager.commitTxn();
        }
    } catch (Exception e) {
        String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
    int stmtId = 0;
    String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId);
    Hive db = getHive();
    if (tbl.getPartitionKeys().size() > 0) {
        PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
        Iterator<Partition> partIter = parts.iterator();
        while (partIter.hasNext()) {
            Partition part = partIter.next();
            checkMmLb(part);
            Path src = part.getDataLocation(), tgt = new Path(src, mmDir);
            srcs.add(src);
            tgts.add(tgt);
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
            }
        }
    } else {
        checkMmLb(tbl);
        Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir);
        srcs.add(src);
        tgts.add(tgt);
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
        }
    }
    // Don't set inputs and outputs - the locks have already been taken so it's pointless.
    MoveWork mw = new MoveWork(null, null, null, null, false);
    mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null));
    ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId);
    Task<?> mv = TaskFactory.get(mw), ic = TaskFactory.get(icw);
    mv.addDependentTask(ic);
    return Lists.<Task<?>>newArrayList(mv);
}
Also used : Path(org.apache.hadoop.fs.Path) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ColumnTruncateTask(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask) ArrayList(java.util.ArrayList) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) LoadMultiFilesDesc(org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc) Hive(org.apache.hadoop.hive.ql.metadata.Hive) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)

Example 5 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class DDLTask method lockDatabase.

/**
 * Lock the database
 *
 * @param lockDb
 *          the database to be locked along with the mode
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 */
private int lockDatabase(Hive db, LockDatabaseDesc lockDb) throws HiveException {
    Context ctx = driverContext.getCtx();
    HiveTxnManager txnManager = ctx.getHiveTxnManager();
    return txnManager.lockDatabase(db, lockDb);
}
Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)103 Path (org.apache.hadoop.fs.Path)45 IOException (java.io.IOException)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)20 Test (org.junit.Test)19 FileSystem (org.apache.hadoop.fs.FileSystem)16 HiveConf (org.apache.hadoop.hive.conf.HiveConf)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 HashMap (java.util.HashMap)13 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)13 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)13 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)13 ArrayList (java.util.ArrayList)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 Table (org.apache.hadoop.hive.ql.metadata.Table)12 JobConf (org.apache.hadoop.mapred.JobConf)12 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)9 QueryState (org.apache.hadoop.hive.ql.QueryState)8