use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.
the class Driver method acquireLocks.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. Locking strategy depends on
* HiveTxnManager and HiveLockManager configured
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened.
* @throws CommandProcessorResponse
*/
private void acquireLocks() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
/*non acid txn managers don't support txns but fwd lock requests to lock managers
acid txn manager requires all locks to be associated with a txn so if we
end up here w/o an open txn it's because we are processing something like "use <database>
which by definition needs no locks*/
return;
}
try {
String userFromUGI = getUserFromUGI();
if (userFromUGI == null) {
throw createProcessorResponse(10);
}
// Set the table write id in all of the acid file sinks
if (haveAcidWrite()) {
List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
// sorting makes tests easier to write since file names and ROW__IDs depend on statementId
// so this makes (file name -> data) mapping stable
acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
for (FileSinkDesc desc : acidSinks) {
TableDesc tableInfo = desc.getTableInfo();
long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
desc.setTableWriteId(writeId);
// it's possible to have > 1 FileSink writing to the same table/partition
// e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
}
}
/*It's imperative that {@code acquireLocks()} is called for all commands so that
HiveTxnManager can transition its state machine correctly*/
queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (queryTxnMgr.recordSnapshot(plan)) {
recordValidTxns(queryTxnMgr);
}
if (plan.hasAcidResourcesInQuery()) {
recordValidWriteIds(queryTxnMgr);
}
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(10);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.
the class TaskCompiler method setLoadFileLocation.
private void setLoadFileLocation(final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException {
// CTAS; make the movetask's destination directory the table's destination.
Long txnIdForCtas = null;
// CTAS cannot be part of multi-txn stmt
int stmtId = 0;
FileSinkDesc dataSinkForCtas = null;
String loc = null;
if (pCtx.getQueryProperties().isCTAS()) {
CreateTableDesc ctd = pCtx.getCreateTable();
dataSinkForCtas = ctd.getAndUnsetWriter();
txnIdForCtas = ctd.getInitialMmWriteId();
loc = ctd.getLocation();
} else {
loc = pCtx.getCreateViewDesc().getLocation();
}
Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc);
if (txnIdForCtas != null) {
dataSinkForCtas.setDirName(location);
location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId));
lfd.setSourcePath(location);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location);
}
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath());
}
lfd.setTargetDir(location);
}
use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.
the class GenSparkUtils method createMoveTask.
/**
* Create and add any dependent move tasks.
*
* This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
* 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
*/
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
Path dest = null;
FileSinkDesc fileSinkDesc = fsOp.getConf();
if (chDir) {
dest = fsOp.getConf().getFinalDirName();
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
Path tmpDir = baseCtx.getExternalTmpPath(dest);
// Change all the linked file sink descriptors
if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
fsConf.setDirName(tmpDir);
}
} else {
fileSinkDesc.setDirName(tmpDir);
}
}
Task<MoveWork> mvTask = null;
if (!chDir) {
mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
}
// Set the move task to be dependent on the current task
if (mvTask != null) {
GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
}
return dest;
}
use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project SQLWindowing by hbutani.
the class MRExecutor method createReduceSideTree.
/**
* The reduce-side plan always looks like
* ExtratOperator->PTFOperator->FileSinkOperator.
* Use the data structures initialized in MRUtils to
* create the operators here.
* @param qdef
* @param mrUtils
* @param mr
* @throws WindowingException
*/
@SuppressWarnings("unchecked")
private void createReduceSideTree(QueryDef qdef, MRUtils mrUtils, MapredWork mr) throws WindowingException {
// reduce side work
Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(mrUtils.getOutputPath(), createOutputTableDesc(qdef), false));
Operator<PTFDesc> op3 = RuntimeUtils.createPTFOperator(new PTFDesc(SerializationUtils.serializeQueryDef(qdef)), op4);
Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(TranslateUtils.getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
mr.setReducer(op2);
}
Aggregations