Search in sources :

Example 11 with LogHelper

use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.

the class HashTableSinkOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
    console = new LogHelper(LOG, isSilent);
    memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
    emptyRowContainer.addRow(emptyObjectArray);
    // for small tables only; so get the big table position first
    posBigTableAlias = conf.getPosBigTable();
    order = conf.getTagOrder();
    // initialize some variables, which used to be initialized in CommonJoinOperator
    this.hconf = hconf;
    filterMaps = conf.getFilterMap();
    int tagLen = conf.getTagLength();
    // process join keys
    joinKeys = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
    joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
    // process join values
    joinValues = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
    // process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
    if (!conf.isNoOuterJoin()) {
        for (Byte alias : order) {
            if (alias == posBigTableAlias || joinValues[alias] == null) {
                continue;
            }
            List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
            if (filterMaps != null && filterMaps[alias] != null) {
                // for each alias, add object inspector for filter tag as the last element
                rcOIs = new ArrayList<ObjectInspector>(rcOIs);
                rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            }
        }
    }
    mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
    if (hashTableScale <= 0) {
        hashTableScale = 1;
    }
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
        for (Byte pos : order) {
            if (pos == posBigTableAlias) {
                continue;
            }
            mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
            TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) MapJoinMemoryExhaustionHandler(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 12 with LogHelper

use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.

the class PreExecutePrinter method run.

public void run(QueryState queryState, Set<ReadEntity> inputs, Set<WriteEntity> outputs, UserGroupInformation ugi) throws Exception {
    LogHelper console = SessionState.getConsole();
    if (console == null) {
        return;
    }
    if (queryState != null) {
        console.printInfo("PREHOOK: query: " + queryState.getQueryString().trim(), false);
        console.printInfo("PREHOOK: type: " + queryState.getCommandType(), false);
    }
    printEntities(console, inputs, "PREHOOK: Input: ");
    printEntities(console, outputs, "PREHOOK: Output: ");
}
Also used : LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper)

Example 13 with LogHelper

use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.

the class RelOptHiveTable method updateColStats.

private void updateColStats(Set<Integer> projIndxLst, boolean allowNullColumnForMissingStats) {
    List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
    List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
    List<String> partColNamesThatRqrStats = new ArrayList<String>();
    List<Integer> partColIndxsThatRqrStats = new ArrayList<Integer>();
    Set<String> colNamesFailedStats = new HashSet<String>();
    // 1. Separate required columns to Non Partition and Partition Cols
    ColumnInfo tmp;
    for (Integer pi : projIndxLst) {
        if (hiveColStatsMap.get(pi) == null) {
            if ((tmp = hiveNonPartitionColsMap.get(pi)) != null) {
                nonPartColNamesThatRqrStats.add(tmp.getInternalName());
                nonPartColIndxsThatRqrStats.add(pi);
            } else if ((tmp = hivePartitionColsMap.get(pi)) != null) {
                partColNamesThatRqrStats.add(tmp.getInternalName());
                partColIndxsThatRqrStats.add(pi);
            } else {
                noColsMissingStats.getAndIncrement();
                String logMsg = "Unable to find Column Index: " + pi + ", in " + hiveTblMetadata.getCompleteName();
                LOG.error(logMsg);
                throw new RuntimeException(logMsg);
            }
        }
    }
    if (null == partitionList) {
        // We could be here either because its an unpartitioned table or because
        // there are no pruning predicates on a partitioned table.
        computePartitionList(hiveConf, null, new HashSet<Integer>());
    }
    ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey());
    if (colStatsCached == null) {
        colStatsCached = new ColumnStatsList();
        colStatsCache.put(partitionList.getKey(), colStatsCached);
    }
    // 2. Obtain Col Stats for Non Partition Cols
    if (nonPartColNamesThatRqrStats.size() > 0) {
        List<ColStatistics> hiveColStats = new ArrayList<ColStatistics>();
        if (!hiveTblMetadata.isPartitioned()) {
            // 2.1 Handle the case for unpartitioned table.
            try {
                Statistics stats = StatsUtils.collectStatistics(hiveConf, null, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true);
                rowCount = stats.getNumRows();
                for (String c : nonPartColNamesThatRqrStats) {
                    ColStatistics cs = stats.getColumnStatisticsFromColName(c);
                    if (cs != null) {
                        hiveColStats.add(cs);
                    }
                }
                colStatsCached.updateState(stats.getColumnStatsState());
                // 2.1.1 Record Column Names that we needed stats for but couldn't
                if (hiveColStats.isEmpty()) {
                    colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
                } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
                    Set<String> setOfFiledCols = new HashSet<String>(nonPartColNamesThatRqrStats);
                    Set<String> setOfObtainedColStats = new HashSet<String>();
                    for (ColStatistics cs : hiveColStats) {
                        setOfObtainedColStats.add(cs.getColumnName());
                    }
                    setOfFiledCols.removeAll(setOfObtainedColStats);
                    colNamesFailedStats.addAll(setOfFiledCols);
                } else {
                    // Column stats in hiveColStats might not be in the same order as the columns in
                    // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
                    // using nonPartColIndxsThatRqrStats as below
                    Map<String, ColStatistics> columnStatsMap = new HashMap<String, ColStatistics>(hiveColStats.size());
                    for (ColStatistics cs : hiveColStats) {
                        columnStatsMap.put(cs.getColumnName(), cs);
                        // stats are not available
                        if (cs.isEstimated()) {
                            colNamesFailedStats.add(cs.getColumnName());
                        }
                    }
                    hiveColStats.clear();
                    for (String colName : nonPartColNamesThatRqrStats) {
                        hiveColStats.add(columnStatsMap.get(colName));
                    }
                }
            } catch (HiveException e) {
                String logMsg = "Collecting stats for table: " + hiveTblMetadata.getTableName() + " failed.";
                LOG.error(logMsg, e);
                throw new RuntimeException(logMsg, e);
            }
        } else {
            // 2.2 Obtain col stats for partitioned table.
            try {
                if (partitionList.getNotDeniedPartns().isEmpty()) {
                    // no need to make a metastore call
                    rowCount = 0;
                    hiveColStats = new ArrayList<ColStatistics>();
                    for (int i = 0; i < nonPartColNamesThatRqrStats.size(); i++) {
                        // add empty stats object for each column
                        hiveColStats.add(new ColStatistics(nonPartColNamesThatRqrStats.get(i), hiveNonPartitionColsMap.get(nonPartColIndxsThatRqrStats.get(i)).getTypeName()));
                    }
                    colNamesFailedStats.clear();
                    colStatsCached.updateState(State.COMPLETE);
                } else {
                    Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true);
                    rowCount = stats.getNumRows();
                    hiveColStats = new ArrayList<ColStatistics>();
                    for (String c : nonPartColNamesThatRqrStats) {
                        ColStatistics cs = stats.getColumnStatisticsFromColName(c);
                        if (cs != null) {
                            hiveColStats.add(cs);
                            if (cs.isEstimated()) {
                                colNamesFailedStats.add(c);
                            }
                        } else {
                            colNamesFailedStats.add(c);
                        }
                    }
                    colStatsCached.updateState(stats.getColumnStatsState());
                }
            } catch (HiveException e) {
                String logMsg = "Collecting stats failed.";
                LOG.error(logMsg, e);
                throw new RuntimeException(logMsg, e);
            }
        }
        if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
            for (int i = 0; i < hiveColStats.size(); i++) {
                // the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
                // are in same order
                hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
                colStatsCached.put(hiveColStats.get(i).getColumnName(), hiveColStats.get(i));
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Stats for column " + hiveColStats.get(i).getColumnName() + " in table " + hiveTblMetadata.getTableName() + " stored in cache");
                    LOG.debug(hiveColStats.get(i).toString());
                }
            }
        }
    }
    // 3. Obtain Stats for Partition Cols
    if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
        ColStatistics cStats = null;
        for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
            cStats = StatsUtils.getColStatsForPartCol(hivePartitionColsMap.get(partColIndxsThatRqrStats.get(i)), new PartitionIterable(partitionList.getNotDeniedPartns()), hiveConf);
            hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats);
            colStatsCached.put(cStats.getColumnName(), cStats);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Stats for column " + cStats.getColumnName() + " in table " + hiveTblMetadata.getTableName() + " stored in cache");
                LOG.debug(cStats.toString());
            }
        }
    }
    // 4. Warn user if we could get stats for required columns
    if (!colNamesFailedStats.isEmpty()) {
        String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats);
        noColsMissingStats.getAndAdd(colNamesFailedStats.size());
        if (allowNullColumnForMissingStats) {
            LOG.warn(logMsg);
            HiveConf conf = SessionState.getSessionConf();
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) {
                LogHelper console = SessionState.getConsole();
                console.printInfo(logMsg);
            }
        } else {
            LOG.error(logMsg);
            throw new RuntimeException(logMsg);
        }
    }
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) Set(java.util.Set) HashSet(java.util.HashSet) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) RelReferentialConstraint(org.apache.calcite.rel.RelReferentialConstraint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ColumnStatsList(org.apache.hadoop.hive.ql.parse.ColumnStatsList) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 14 with LogHelper

use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.

the class ExecDriver method main.

@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
    String planFileName = null;
    String jobConfFileName = null;
    boolean noLog = false;
    String files = null;
    String libjars = null;
    boolean localtask = false;
    try {
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-plan")) {
                planFileName = args[++i];
            } else if (args[i].equals("-jobconffile")) {
                jobConfFileName = args[++i];
            } else if (args[i].equals("-nolog")) {
                noLog = true;
            } else if (args[i].equals("-files")) {
                files = args[++i];
            } else if (args[i].equals("-libjars")) {
                libjars = args[++i];
            } else if (args[i].equals("-localtask")) {
                localtask = true;
            }
        }
    } catch (IndexOutOfBoundsException e) {
        System.err.println("Missing argument to option");
        printUsage();
    }
    JobConf conf;
    if (localtask) {
        conf = new JobConf(MapredLocalTask.class);
    } else {
        conf = new JobConf(ExecDriver.class);
    }
    if (jobConfFileName != null) {
        conf.addResource(new Path(jobConfFileName));
    }
    // Initialize the resources from command line
    if (files != null) {
        conf.set("tmpfiles", files);
    }
    if (libjars != null) {
        conf.set("tmpjars", libjars);
    }
    if (UserGroupInformation.isSecurityEnabled()) {
        String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
        if (hadoopAuthToken != null) {
            conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
        }
    }
    boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
    if (queryId.isEmpty()) {
        queryId = "unknown-" + System.currentTimeMillis();
        HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
    }
    System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
    LogUtils.registerLoggingContext(conf);
    if (noLog) {
        // If started from main(), and noLog is on, we should not output
        // any logs. To turn the log on, please set -Dtest.silent=false
        org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
        NullAppender appender = NullAppender.createNullAppender();
        appender.addToLogger(logger.getName(), Level.ERROR);
        appender.start();
    } else {
        setupChildLog4j(conf);
    }
    Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
    LogHelper console = new LogHelper(LOG, isSilent);
    if (planFileName == null) {
        console.printError("Must specify Plan File Name");
        printUsage();
    }
    // that it's easy to find reason for local mode execution failures
    for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
        if (appender instanceof FileAppender) {
            console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
        } else if (appender instanceof RollingFileAppender) {
            console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
        }
    }
    // the plan file should always be in local directory
    Path p = new Path(planFileName);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream pathData = fs.open(p);
    // child process. so we add it here explicitly
    try {
        // see also - code in CliDriver.java
        ClassLoader loader = conf.getClassLoader();
        if (StringUtils.isNotBlank(libjars)) {
            loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
        }
        conf.setClassLoader(loader);
        // Also set this to the Thread ContextClassLoader, so new threads will
        // inherit
        // this class loader, and propagate into newly created Configurations by
        // those
        // new threads.
        Thread.currentThread().setContextClassLoader(loader);
    } catch (Exception e) {
        throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
        memoryMXBean = ManagementFactory.getMemoryMXBean();
        MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
        MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
        ret = ed.executeInProcess(new DriverContext());
    } else {
        MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
        ExecDriver ed = new ExecDriver(plan, conf, isSilent);
        ret = ed.execute(new DriverContext());
    }
    if (ret != 0) {
        System.exit(ret);
    }
}
Also used : DriverContext(org.apache.hadoop.hive.ql.DriverContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) NullAppender(org.apache.hadoop.hive.ql.log.NullAppender) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) Logger(org.slf4j.Logger) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) NullAppender(org.apache.hadoop.hive.ql.log.NullAppender) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) Appender(org.apache.logging.log4j.core.Appender) LogDivertAppender(org.apache.hadoop.hive.ql.log.LogDivertAppender) FileAppender(org.apache.logging.log4j.core.appender.FileAppender) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) FileAppender(org.apache.logging.log4j.core.appender.FileAppender) InputStream(java.io.InputStream) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) IOException(java.io.IOException) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork)

Example 15 with LogHelper

use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.

the class Task method initialize.

public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) {
    this.queryPlan = queryPlan;
    setInitialized();
    this.queryState = queryState;
    if (null == this.conf) {
        this.conf = queryState.getConf();
    }
    this.driverContext = driverContext;
    console = new LogHelper(LOG);
}
Also used : LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper)

Aggregations

LogHelper (org.apache.hadoop.hive.ql.session.SessionState.LogHelper)15 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Map (java.util.Map)4 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 DriverContext (org.apache.hadoop.hive.ql.DriverContext)2 SessionState (org.apache.hadoop.hive.ql.session.SessionState)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 InputStream (java.io.InputStream)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Set (java.util.Set)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1