Search in sources :

Example 6 with Configurable

use of org.apache.hadoop.conf.Configurable in project hive by apache.

the class Driver method compile.

// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
private void compile(String command, boolean resetTaskIds, boolean deferClose) throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger(true);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
    lDrvState.stateLock.lock();
    try {
        lDrvState.driverState = DriverState.COMPILING;
    } finally {
        lDrvState.stateLock.unlock();
    }
    command = new VariableSubstitution(new HiveVariableSource() {

        @Override
        public Map<String, String> getHiveVariable() {
            return SessionState.get().getHiveVariables();
        }
    }).substitute(conf, command);
    String queryStr = command;
    try {
        // command should be redacted to avoid to logging sensitive data
        queryStr = HookUtils.redactLogString(conf, command);
    } catch (Exception e) {
        LOG.warn("WARNING! Query command could not be redacted." + e);
    }
    checkInterrupted("at beginning of compilation.", null, null);
    if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
        // close the existing ctx etc before compiling a new query, but does not destroy driver
        closeInProcess(false);
    }
    if (resetTaskIds) {
        TaskFactory.resetId();
    }
    LockedDriverState.setLockedDriverState(lDrvState);
    String queryId = queryState.getQueryId();
    if (ctx != null) {
        setTriggerContext(queryId);
    }
    // save some info for webUI for use after plan is freed
    this.queryDisplay.setQueryStr(queryStr);
    this.queryDisplay.setQueryId(queryId);
    LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
    conf.setQueryString(queryStr);
    // FIXME: sideeffect will leave the last query set at the session level
    SessionState.get().getConf().setQueryString(queryStr);
    SessionState.get().setupQueryCurrentTimestamp();
    // Whether any error occurred during query compilation. Used for query lifetime hook.
    boolean compileError = false;
    boolean parseError = false;
    try {
        // Initialize the transaction manager.  This must be done before analyze is called.
        if (initTxnMgr != null) {
            queryTxnMgr = initTxnMgr;
        } else {
            queryTxnMgr = SessionState.get().initTxnMgr(conf);
        }
        if (queryTxnMgr instanceof Configurable) {
            ((Configurable) queryTxnMgr).setConf(conf);
        }
        queryState.setTxnManager(queryTxnMgr);
        // In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
        // if compile is being called multiple times, clear the old shutdownhook
        ShutdownHookManager.removeShutdownHook(shutdownRunner);
        final HiveTxnManager txnMgr = queryTxnMgr;
        shutdownRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    releaseLocksAndCommitOrRollback(false, txnMgr);
                } catch (LockException e) {
                    LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
                }
            }
        };
        ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
        checkInterrupted("before parsing and analysing the query", null, null);
        if (ctx == null) {
            ctx = new Context(conf);
            setTriggerContext(queryId);
        }
        ctx.setRuntimeStatsSource(runtimeStatsSource);
        ctx.setCmd(command);
        ctx.setHDFSCleanup(true);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
        // Trigger query hook before compilation
        hookRunner.runBeforeParseHook(command);
        ASTNode tree;
        try {
            tree = ParseUtils.parse(command, ctx);
        } catch (ParseException e) {
            parseError = true;
            throw e;
        } finally {
            hookRunner.runAfterParseHook(command, parseError);
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
        hookRunner.runBeforeCompileHook(command);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
        // Flush the metastore cache.  This assures that we don't pick up objects from a previous
        // query running in this same thread.  This has to be done after we get our semantic
        // analyzer (this is when the connection to the metastore is made) but before we analyze,
        // because at that point we need access to the objects.
        Hive.get().getMSC().flushCache();
        BaseSemanticAnalyzer sem;
        // Do semantic analysis and plan generation
        if (hookRunner.hasPreAnalyzeHooks()) {
            HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
            hookCtx.setConf(conf);
            hookCtx.setUserName(userName);
            hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
            hookCtx.setCommand(command);
            hookCtx.setHiveOperation(queryState.getHiveOperation());
            tree = hookRunner.runPreAnalyzeHooks(hookCtx, tree);
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
            hookCtx.update(sem);
            hookRunner.runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
        } else {
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
        }
        LOG.info("Semantic Analysis Completed");
        // Retrieve information about cache usage for the query.
        if (conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
            cacheUsage = sem.getCacheUsage();
        }
        // validate the plan
        sem.validate();
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
        checkInterrupted("after analyzing query.", null, null);
        // get the output schema
        schema = getSchema(sem, conf);
        plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
        conf.set("mapreduce.workflow.id", "hive_" + queryId);
        conf.set("mapreduce.workflow.name", queryStr);
        // initialize FetchTask right here
        if (plan.getFetchTask() != null) {
            plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
        }
        // do the authorization check
        if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
            try {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
                doAuthorization(queryState.getHiveOperation(), sem, command);
            } catch (AuthorizationException authExp) {
                console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
                errorMessage = authExp.getMessage();
                SQLState = "42000";
                throw createProcessorResponse(403);
            } finally {
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
            }
        }
        if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
            String explainOutput = getExplainOutput(sem, plan, tree);
            if (explainOutput != null) {
                LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
                if (conf.isWebUiQueryInfoCacheEnabled()) {
                    queryDisplay.setExplainPlan(explainOutput);
                }
            }
        }
    } catch (CommandProcessorResponse cpr) {
        throw cpr;
    } catch (Exception e) {
        checkInterrupted("during query compilation: " + e.getMessage(), null, null);
        compileError = true;
        ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
        errorMessage = "FAILED: " + e.getClass().getSimpleName();
        if (error != ErrorMsg.GENERIC_ERROR) {
            errorMessage += " [Error " + error.getErrorCode() + "]:";
        }
        // HIVE-4889
        if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
            errorMessage += " " + e.getCause().getMessage();
        } else {
            errorMessage += " " + e.getMessage();
        }
        if (error == ErrorMsg.TXNMGR_NOT_ACID) {
            errorMessage += ". Failed command: " + queryStr;
        }
        SQLState = error.getSQLState();
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(error.getErrorCode());
    } finally {
        // before/after execution hook will never be executed.
        if (!parseError) {
            try {
                hookRunner.runAfterCompilationHook(command, compileError);
            } catch (Exception e) {
                LOG.warn("Failed when invoking query after-compilation hook.", e);
            }
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
        ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
        boolean isInterrupted = lDrvState.isAborted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
            } else {
                lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveVariableSource(org.apache.hadoop.hive.conf.HiveVariableSource) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Configurable(org.apache.hadoop.conf.Configurable) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) PrivateHookContext(org.apache.hadoop.hive.ql.hooks.PrivateHookContext) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) HiveAuthzContext(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) VariableSubstitution(org.apache.hadoop.hive.conf.VariableSubstitution) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveSemanticAnalyzerHookContextImpl(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 7 with Configurable

use of org.apache.hadoop.conf.Configurable in project flink by apache.

the class HadoopInputSplit method readObject.

private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
    // read the parent fields and the final fields
    in.defaultReadObject();
    // the job conf knows how to deserialize itself
    jobConf = new JobConf();
    jobConf.readFields(in);
    try {
        hadoopInputSplit = (org.apache.hadoop.mapred.InputSplit) WritableFactories.newInstance(splitType);
    } catch (Exception e) {
        throw new RuntimeException("Unable to instantiate Hadoop InputSplit", e);
    }
    if (hadoopInputSplit instanceof Configurable) {
        ((Configurable) hadoopInputSplit).setConf(this.jobConf);
    } else if (hadoopInputSplit instanceof JobConfigurable) {
        ((JobConfigurable) hadoopInputSplit).configure(this.jobConf);
    }
    hadoopInputSplit.readFields(in);
}
Also used : JobConfigurable(org.apache.hadoop.mapred.JobConfigurable) JobConfigurable(org.apache.hadoop.mapred.JobConfigurable) Configurable(org.apache.hadoop.conf.Configurable) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException)

Example 8 with Configurable

use of org.apache.hadoop.conf.Configurable in project hive by apache.

the class HadoopThriftAuthBridge23 method getHadoopSaslProperties.

/**
   * Read and return Hadoop SASL configuration which can be configured using
   * "hadoop.rpc.protection"
   *
   * @param conf
   * @return Hadoop SASL configuration
   */
@SuppressWarnings("unchecked")
@Override
public Map<String, String> getHadoopSaslProperties(Configuration conf) {
    if (SASL_PROPS_FIELD != null) {
        // hadoop 2.4 and earlier way of finding the sasl property settings
        // Initialize the SaslRpcServer to ensure QOP parameters are read from
        // conf
        SaslRpcServer.init(conf);
        try {
            return (Map<String, String>) SASL_PROPS_FIELD.get(null);
        } catch (Exception e) {
            throw new IllegalStateException("Error finding hadoop SASL properties", e);
        }
    }
    // 2.5 and later way of finding sasl property
    try {
        Configurable saslPropertiesResolver = (Configurable) RES_GET_INSTANCE_METHOD.invoke(null, conf);
        saslPropertiesResolver.setConf(conf);
        return (Map<String, String>) GET_DEFAULT_PROP_METHOD.invoke(saslPropertiesResolver);
    } catch (Exception e) {
        throw new IllegalStateException("Error finding hadoop SASL properties", e);
    }
}
Also used : Configurable(org.apache.hadoop.conf.Configurable) Map(java.util.Map)

Example 9 with Configurable

use of org.apache.hadoop.conf.Configurable in project mongo-hadoop by mongodb.

the class BSONSplitter method run.

/**
     * When run as a Tool, BSONSplitter can be used to pre-split and compress
     * BSON files. This can be especially useful before uploading large BSON
     * files to HDFS to save time. The compressed splits are written to the
     * given output path or to the directory containing the input file, if
     * the output path is unspecified. A ".splits" file is not generated, since
     * each output file is expected to be its own split.
     *
     * @param args command-line arguments. Run with zero arguments to see usage.
     * @return exit status
     * @throws Exception
     */
@Override
public int run(final String[] args) throws Exception {
    if (args.length < 1) {
        printUsage();
        return 1;
    }
    // Parse command-line arguments.
    Path filePath = new Path(args[0]);
    String compressorName = null, outputDirectoryStr = null;
    Path outputDirectory;
    CompressionCodec codec;
    Compressor compressor;
    for (int i = 1; i < args.length; ++i) {
        if ("-c".equals(args[i]) && args.length > i) {
            compressorName = args[++i];
        } else if ("-o".equals(args[i]) && args.length > i) {
            outputDirectoryStr = args[++i];
        } else {
            // CHECKSTYLE:OFF
            System.err.println("unrecognized option: " + args[i]);
            // CHECKSTYLE:ON
            printUsage();
            return 1;
        }
    }
    // Supply default values for unspecified arguments.
    if (null == outputDirectoryStr) {
        outputDirectory = filePath.getParent();
    } else {
        outputDirectory = new Path(outputDirectoryStr);
    }
    if (null == compressorName) {
        codec = new DefaultCodec();
    } else {
        Class<?> codecClass = Class.forName(compressorName);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, getConf());
    }
    if (codec instanceof Configurable) {
        ((Configurable) codec).setConf(getConf());
    }
    // Do not write a .splits file so as not to confuse BSONSplitter.
    // Each compressed file will be its own split.
    MongoConfigUtil.setBSONWriteSplits(getConf(), false);
    // Open the file.
    FileSystem inputFS = FileSystem.get(filePath.toUri(), getConf());
    FileSystem outputFS = FileSystem.get(outputDirectory.toUri(), getConf());
    FSDataInputStream inputStream = inputFS.open(filePath);
    // Use BSONSplitter to split the file.
    Path splitFilePath = getSplitsFilePath(filePath, getConf());
    try {
        loadSplitsFromSplitFile(inputFS.getFileStatus(filePath), splitFilePath);
    } catch (NoSplitFileException e) {
        LOG.info("did not find .splits file in " + splitFilePath.toUri());
        setInputPath(filePath);
        readSplits();
    }
    List<BSONFileSplit> splits = getAllSplits();
    LOG.info("compressing " + splits.size() + " splits.");
    byte[] buf = new byte[1024 * 1024];
    for (int i = 0; i < splits.size(); ++i) {
        // e.g., hdfs:///user/hive/warehouse/mongo/OutputFile-42.bz2
        Path splitOutputPath = new Path(outputDirectory, filePath.getName() + "-" + i + codec.getDefaultExtension());
        // Compress the split into a new file.
        compressor = CodecPool.getCompressor(codec);
        CompressionOutputStream compressionOutputStream = null;
        try {
            compressionOutputStream = codec.createOutputStream(outputFS.create(splitOutputPath), compressor);
            int totalBytes = 0, bytesRead = 0;
            BSONFileSplit split = splits.get(i);
            inputStream.seek(split.getStart());
            LOG.info("writing " + splitOutputPath.toUri() + ".");
            while (totalBytes < split.getLength() && bytesRead >= 0) {
                bytesRead = inputStream.read(buf, 0, (int) Math.min(buf.length, split.getLength() - totalBytes));
                if (bytesRead > 0) {
                    compressionOutputStream.write(buf, 0, bytesRead);
                    totalBytes += bytesRead;
                }
            }
        } finally {
            if (compressionOutputStream != null) {
                compressionOutputStream.close();
            }
            CodecPool.returnCompressor(compressor);
        }
    }
    LOG.info("done.");
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) BSONFileSplit(com.mongodb.hadoop.input.BSONFileSplit) Compressor(org.apache.hadoop.io.compress.Compressor) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Configurable(org.apache.hadoop.conf.Configurable) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 10 with Configurable

use of org.apache.hadoop.conf.Configurable in project presto by prestodb.

the class PrestoS3FileSystem method createEncryptionMaterialsProvider.

private static Optional<EncryptionMaterialsProvider> createEncryptionMaterialsProvider(Configuration hadoopConfig) {
    String kmsKeyId = hadoopConfig.get(S3_KMS_KEY_ID);
    if (kmsKeyId != null) {
        return Optional.of(new KMSEncryptionMaterialsProvider(kmsKeyId));
    }
    String empClassName = hadoopConfig.get(S3_ENCRYPTION_MATERIALS_PROVIDER);
    if (empClassName == null) {
        return Optional.empty();
    }
    try {
        Object instance = Class.forName(empClassName).getConstructor().newInstance();
        if (!(instance instanceof EncryptionMaterialsProvider)) {
            throw new RuntimeException("Invalid encryption materials provider class: " + instance.getClass().getName());
        }
        EncryptionMaterialsProvider emp = (EncryptionMaterialsProvider) instance;
        if (emp instanceof Configurable) {
            ((Configurable) emp).setConf(hadoopConfig);
        }
        return Optional.of(emp);
    } catch (ReflectiveOperationException e) {
        throw new RuntimeException("Unable to load or create S3 encryption materials provider: " + empClassName, e);
    }
}
Also used : KMSEncryptionMaterialsProvider(com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider) EncryptionMaterialsProvider(com.amazonaws.services.s3.model.EncryptionMaterialsProvider) KMSEncryptionMaterialsProvider(com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider) Configurable(org.apache.hadoop.conf.Configurable)

Aggregations

Configurable (org.apache.hadoop.conf.Configurable)16 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)6 IOException (java.io.IOException)4 FileOutputStream (java.io.FileOutputStream)3 Map (java.util.Map)3 Configuration (org.apache.hadoop.conf.Configuration)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 Writable (org.apache.hadoop.io.Writable)2 Compressor (org.apache.hadoop.io.compress.Compressor)2 EncryptionMaterialsProvider (com.amazonaws.services.s3.model.EncryptionMaterialsProvider)1 KMSEncryptionMaterialsProvider (com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 BSONFileSplit (com.mongodb.hadoop.input.BSONFileSplit)1 BufferOverflowException (java.nio.BufferOverflowException)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 GoraException (org.apache.gora.util.GoraException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1