Search in sources :

Example 26 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class Driver method acquireLocks.

/**
 * Acquire read and write locks needed by the statement. The list of objects to be locked are
 * obtained from the inputs and outputs populated by the compiler.  Locking strategy depends on
 * HiveTxnManager and HiveLockManager configured
 *
 * This method also records the list of valid transactions.  This must be done after any
 * transactions have been opened.
 * @throws CommandProcessorResponse
 */
private void acquireLocks() throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
        /*non acid txn managers don't support txns but fwd lock requests to lock managers
        acid txn manager requires all locks to be associated with a txn so if we
        end up here w/o an open txn it's because we are processing something like "use <database>
        which by definition needs no locks*/
        return;
    }
    try {
        String userFromUGI = getUserFromUGI();
        if (userFromUGI == null) {
            throw createProcessorResponse(10);
        }
        // Set the table write id in all of the acid file sinks
        if (haveAcidWrite()) {
            List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
            // sorting makes tests easier to write since file names and ROW__IDs depend on statementId
            // so this makes (file name -> data) mapping stable
            acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
            for (FileSinkDesc desc : acidSinks) {
                TableDesc tableInfo = desc.getTableInfo();
                long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
                desc.setTableWriteId(writeId);
                // it's possible to have > 1 FileSink writing to the same table/partition
                // e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
                desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
            }
        }
        /*It's imperative that {@code acquireLocks()} is called for all commands so that
      HiveTxnManager can transition its state machine correctly*/
        queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
        if (queryTxnMgr.recordSnapshot(plan)) {
            recordValidTxns(queryTxnMgr);
        }
        if (plan.hasAcidResourcesInQuery()) {
            recordValidWriteIds(queryTxnMgr);
        }
    } catch (Exception e) {
        errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        SQLState = ErrorMsg.findSQLState(e.getMessage());
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(10);
    } finally {
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
    }
}
Also used : FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException)

Example 27 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TaskCompiler method setLoadFileLocation.

private void setLoadFileLocation(final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException {
    // CTAS; make the movetask's destination directory the table's destination.
    Long txnIdForCtas = null;
    // CTAS cannot be part of multi-txn stmt
    int stmtId = 0;
    FileSinkDesc dataSinkForCtas = null;
    String loc = null;
    if (pCtx.getQueryProperties().isCTAS()) {
        CreateTableDesc ctd = pCtx.getCreateTable();
        dataSinkForCtas = ctd.getAndUnsetWriter();
        txnIdForCtas = ctd.getInitialMmWriteId();
        loc = ctd.getLocation();
    } else {
        loc = pCtx.getCreateViewDesc().getLocation();
    }
    Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc);
    if (txnIdForCtas != null) {
        dataSinkForCtas.setDirName(location);
        location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId));
        lfd.setSourcePath(location);
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location);
        }
    }
    if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
        Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath());
    }
    lfd.setTargetDir(location);
}
Also used : Path(org.apache.hadoop.fs.Path) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 28 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class GenSparkUtils method createMoveTask.

/**
 * Create and add any dependent move tasks.
 *
 * This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
 * 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
 */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (chDir) {
        dest = fsOp.getConf().getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        Path tmpDir = baseCtx.getExternalTmpPath(dest);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setDirName(tmpDir);
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 29 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project SQLWindowing by hbutani.

the class MRExecutor method createReduceSideTree.

/**
 * The reduce-side plan always looks like
 * ExtratOperator->PTFOperator->FileSinkOperator.
 * Use the data structures initialized in MRUtils to
 * create the operators here.
 * @param qdef
 * @param mrUtils
 * @param mr
 * @throws WindowingException
 */
@SuppressWarnings("unchecked")
private void createReduceSideTree(QueryDef qdef, MRUtils mrUtils, MapredWork mr) throws WindowingException {
    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(mrUtils.getOutputPath(), createOutputTableDesc(qdef), false));
    Operator<PTFDesc> op3 = RuntimeUtils.createPTFOperator(new PTFDesc(SerializationUtils.serializeQueryDef(qdef)), op4);
    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(TranslateUtils.getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
    mr.setReducer(op2);
}
Also used : FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ExtractDesc(org.apache.hadoop.hive.ql.plan.ExtractDesc)

Aggregations

FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)29 Path (org.apache.hadoop.fs.Path)17 ArrayList (java.util.ArrayList)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)9 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)6 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)6 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)6 DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)5 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 Serializable (java.io.Serializable)3 List (java.util.List)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3