use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class RewriteParseContextGenerator method generateOperatorTree.
/**
* Parse the input {@link String} command and generate an operator tree.
* @param conf
* @param command
* @throws SemanticException
*/
public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, String command) throws SemanticException {
Operator<? extends OperatorDesc> operatorTree;
try {
Context ctx = new Context(queryState.getConf());
ASTNode tree = ParseUtils.parse(command, ctx);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
assert (sem instanceof SemanticAnalyzer);
operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
LOG.info("Sub-query Semantic Analysis Completed");
} catch (IOException e) {
LOG.error("IOException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
} catch (ParseException e) {
LOG.error("ParseException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
} catch (SemanticException e) {
LOG.error("SemanticException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
return operatorTree;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestUtilities method runTestGetInputSummary.
private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
// creates scratch directories needed by the Context object
SessionState.start(new HiveConf());
MapWork mapWork = new MapWork();
Context context = new Context(jobConf);
LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
TableScanOperator scanOp = new TableScanOperator();
PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
String testTableName = "testTable";
Path testTablePath = new Path(testTableName);
Path[] testPartitionsPaths = new Path[numOfPartitions];
for (int i = 0; i < numOfPartitions; i++) {
String testPartitionName = "p=" + 1;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, scanOp);
}
mapWork.setPathToAliases(pathToAliasTable);
mapWork.setPathToPartitionInfo(pathToPartitionInfo);
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
byte[] data = new byte[bytesPerFile];
for (int i = 0; i < numOfPartitions; i++) {
fs.mkdirs(testPartitionsPaths[i]);
FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
out.write(data);
out.close();
}
return Utilities.getInputSummary(context, mapWork, null);
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class GenericUDTFGetSplits method getSplits.
public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema) throws IOException {
DAG dag = DAG.create(work.getName());
dag.setCredentials(job.getCredentials());
DagUtils utils = DagUtils.getInstance();
Context ctx = new Context(job);
MapWork mapWork = (MapWork) work.getAllWork().get(0);
// bunch of things get setup in the context based on conf but we need only the MR tmp directory
// for the following method.
JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
// TODO: should we also whitelist input formats here? from mapred.input.format.class
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
FileSystem fs = scratchDir.getFileSystem(job);
try {
LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(), utils, job);
Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, appJarLr, new ArrayList<LocalResource>(), fs, ctx, false, work, work.getVertexType(mapWork));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag);
// we have the dag now proceed to get the splits:
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
List<Event> eventList = splitGenerator.initialize();
InputSplit[] result = new InputSplit[eventList.size() - 1];
InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
Preconditions.checkState(hints.size() == eventList.size() - 1);
if (LOG.isDebugEnabled()) {
LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
}
LlapCoordinator coordinator = LlapCoordinator.getInstance();
if (coordinator == null) {
throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
}
// See the discussion in the implementation as to why we generate app ID.
ApplicationId applicationId = coordinator.createExtClientAppId();
// This assumes LLAP cluster owner is always the HS2 user.
String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
String queryUser = null;
byte[] tokenBytes = null;
LlapSigner signer = null;
if (UserGroupInformation.isSecurityEnabled()) {
signer = coordinator.getLlapSigner(job);
// 1. Generate the token for query user (applies to all splits).
queryUser = SessionState.getUserFromAuthenticator();
if (queryUser == null) {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
}
LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
// We put the query user, not LLAP user, into the message and token.
Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
LOG.info("Created the token for remote user: {}", token);
bos.reset();
token.write(dos);
tokenBytes = bos.toByteArray();
} else {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
}
LOG.info("Number of splits: " + (eventList.size() - 1));
SignedMessage signedSvs = null;
for (int i = 0; i < eventList.size() - 1; i++) {
TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
// 2. Generate the vertex/submit information for all events.
if (i == 0) {
// The queryId could either be picked up from the current request being processed, or
// generated. The current request isn't exactly correct since the query is 'done' once we
// return the results. Generating a new one has the added benefit of working once this
// is moved out of a UDTF into a proper API.
// Setting this to the generated AppId which is unique.
// Despite the differences in TaskSpec, the vertex spec should be the same.
signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
}
SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature);
byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
// 3. Generate input event.
SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
// 4. Make location hints.
SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
}
return result;
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class DDLTask method generateAddMmTasks.
private List<Task<?>> generateAddMmTasks(Table tbl) throws HiveException {
// We will move all the files in the table/partition directories into the first MM
// directory, then commit the first write ID.
List<Path> srcs = new ArrayList<>(), tgts = new ArrayList<>();
long mmWriteId = 0;
try {
HiveTxnManager txnManager = SessionState.get().getTxnMgr();
if (txnManager.isTxnOpen()) {
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
} else {
txnManager.openTxn(new Context(conf), conf.getUser());
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
txnManager.commitTxn();
}
} catch (Exception e) {
String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
int stmtId = 0;
String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId);
Hive db = getHive();
if (tbl.getPartitionKeys().size() > 0) {
PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
Iterator<Partition> partIter = parts.iterator();
while (partIter.hasNext()) {
Partition part = partIter.next();
checkMmLb(part);
Path src = part.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
} else {
checkMmLb(tbl);
Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
// Don't set inputs and outputs - the locks have already been taken so it's pointless.
MoveWork mw = new MoveWork(null, null, null, null, false);
mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null));
ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId);
Task<?> mv = TaskFactory.get(mw), ic = TaskFactory.get(icw);
mv.addDependentTask(ic);
return Lists.<Task<?>>newArrayList(mv);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class DDLTask method lockDatabase.
/**
* Lock the database
*
* @param lockDb
* the database to be locked along with the mode
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int lockDatabase(Hive db, LockDatabaseDesc lockDb) throws HiveException {
Context ctx = driverContext.getCtx();
HiveTxnManager txnManager = ctx.getHiveTxnManager();
return txnManager.lockDatabase(db, lockDb);
}
Aggregations