use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.
the class HashTableSinkOperator method initializeOp.
@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
console = new LogHelper(LOG, isSilent);
memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
emptyRowContainer.addRow(emptyObjectArray);
// for small tables only; so get the big table position first
posBigTableAlias = conf.getPosBigTable();
order = conf.getTagOrder();
// initialize some variables, which used to be initialized in CommonJoinOperator
this.hconf = hconf;
filterMaps = conf.getFilterMap();
int tagLen = conf.getTagLength();
// process join keys
joinKeys = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
// process join values
joinValues = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
// process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
if (!conf.isNoOuterJoin()) {
for (Byte alias : order) {
if (alias == posBigTableAlias || joinValues[alias] == null) {
continue;
}
List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
if (filterMaps != null && filterMaps[alias] != null) {
// for each alias, add object inspector for filter tag as the last element
rcOIs = new ArrayList<ObjectInspector>(rcOIs);
rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
}
}
}
mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
if (hashTableScale <= 0) {
hashTableScale = 1;
}
try {
TableDesc keyTableDesc = conf.getKeyTblDesc();
AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
for (Byte pos : order) {
if (pos == posBigTableAlias) {
continue;
}
mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
}
} catch (SerDeException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.
the class PreExecutePrinter method run.
public void run(QueryState queryState, Set<ReadEntity> inputs, Set<WriteEntity> outputs, UserGroupInformation ugi) throws Exception {
LogHelper console = SessionState.getConsole();
if (console == null) {
return;
}
if (queryState != null) {
console.printInfo("PREHOOK: query: " + queryState.getQueryString().trim(), false);
console.printInfo("PREHOOK: type: " + queryState.getCommandType(), false);
}
printEntities(console, inputs, "PREHOOK: Input: ");
printEntities(console, outputs, "PREHOOK: Output: ");
}
use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.
the class RelOptHiveTable method updateColStats.
private void updateColStats(Set<Integer> projIndxLst, boolean allowNullColumnForMissingStats) {
List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
List<String> partColNamesThatRqrStats = new ArrayList<String>();
List<Integer> partColIndxsThatRqrStats = new ArrayList<Integer>();
Set<String> colNamesFailedStats = new HashSet<String>();
// 1. Separate required columns to Non Partition and Partition Cols
ColumnInfo tmp;
for (Integer pi : projIndxLst) {
if (hiveColStatsMap.get(pi) == null) {
if ((tmp = hiveNonPartitionColsMap.get(pi)) != null) {
nonPartColNamesThatRqrStats.add(tmp.getInternalName());
nonPartColIndxsThatRqrStats.add(pi);
} else if ((tmp = hivePartitionColsMap.get(pi)) != null) {
partColNamesThatRqrStats.add(tmp.getInternalName());
partColIndxsThatRqrStats.add(pi);
} else {
noColsMissingStats.getAndIncrement();
String logMsg = "Unable to find Column Index: " + pi + ", in " + hiveTblMetadata.getCompleteName();
LOG.error(logMsg);
throw new RuntimeException(logMsg);
}
}
}
if (null == partitionList) {
// We could be here either because its an unpartitioned table or because
// there are no pruning predicates on a partitioned table.
computePartitionList(hiveConf, null, new HashSet<Integer>());
}
ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey());
if (colStatsCached == null) {
colStatsCached = new ColumnStatsList();
colStatsCache.put(partitionList.getKey(), colStatsCached);
}
// 2. Obtain Col Stats for Non Partition Cols
if (nonPartColNamesThatRqrStats.size() > 0) {
List<ColStatistics> hiveColStats = new ArrayList<ColStatistics>();
if (!hiveTblMetadata.isPartitioned()) {
// 2.1 Handle the case for unpartitioned table.
try {
Statistics stats = StatsUtils.collectStatistics(hiveConf, null, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true);
rowCount = stats.getNumRows();
for (String c : nonPartColNamesThatRqrStats) {
ColStatistics cs = stats.getColumnStatisticsFromColName(c);
if (cs != null) {
hiveColStats.add(cs);
}
}
colStatsCached.updateState(stats.getColumnStatsState());
// 2.1.1 Record Column Names that we needed stats for but couldn't
if (hiveColStats.isEmpty()) {
colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
} else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
Set<String> setOfFiledCols = new HashSet<String>(nonPartColNamesThatRqrStats);
Set<String> setOfObtainedColStats = new HashSet<String>();
for (ColStatistics cs : hiveColStats) {
setOfObtainedColStats.add(cs.getColumnName());
}
setOfFiledCols.removeAll(setOfObtainedColStats);
colNamesFailedStats.addAll(setOfFiledCols);
} else {
// Column stats in hiveColStats might not be in the same order as the columns in
// nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
// using nonPartColIndxsThatRqrStats as below
Map<String, ColStatistics> columnStatsMap = new HashMap<String, ColStatistics>(hiveColStats.size());
for (ColStatistics cs : hiveColStats) {
columnStatsMap.put(cs.getColumnName(), cs);
// stats are not available
if (cs.isEstimated()) {
colNamesFailedStats.add(cs.getColumnName());
}
}
hiveColStats.clear();
for (String colName : nonPartColNamesThatRqrStats) {
hiveColStats.add(columnStatsMap.get(colName));
}
}
} catch (HiveException e) {
String logMsg = "Collecting stats for table: " + hiveTblMetadata.getTableName() + " failed.";
LOG.error(logMsg, e);
throw new RuntimeException(logMsg, e);
}
} else {
// 2.2 Obtain col stats for partitioned table.
try {
if (partitionList.getNotDeniedPartns().isEmpty()) {
// no need to make a metastore call
rowCount = 0;
hiveColStats = new ArrayList<ColStatistics>();
for (int i = 0; i < nonPartColNamesThatRqrStats.size(); i++) {
// add empty stats object for each column
hiveColStats.add(new ColStatistics(nonPartColNamesThatRqrStats.get(i), hiveNonPartitionColsMap.get(nonPartColIndxsThatRqrStats.get(i)).getTypeName()));
}
colNamesFailedStats.clear();
colStatsCached.updateState(State.COMPLETE);
} else {
Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList, hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached, nonPartColNamesThatRqrStats, true);
rowCount = stats.getNumRows();
hiveColStats = new ArrayList<ColStatistics>();
for (String c : nonPartColNamesThatRqrStats) {
ColStatistics cs = stats.getColumnStatisticsFromColName(c);
if (cs != null) {
hiveColStats.add(cs);
if (cs.isEstimated()) {
colNamesFailedStats.add(c);
}
} else {
colNamesFailedStats.add(c);
}
}
colStatsCached.updateState(stats.getColumnStatsState());
}
} catch (HiveException e) {
String logMsg = "Collecting stats failed.";
LOG.error(logMsg, e);
throw new RuntimeException(logMsg, e);
}
}
if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
for (int i = 0; i < hiveColStats.size(); i++) {
// the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
// are in same order
hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
colStatsCached.put(hiveColStats.get(i).getColumnName(), hiveColStats.get(i));
if (LOG.isDebugEnabled()) {
LOG.debug("Stats for column " + hiveColStats.get(i).getColumnName() + " in table " + hiveTblMetadata.getTableName() + " stored in cache");
LOG.debug(hiveColStats.get(i).toString());
}
}
}
}
// 3. Obtain Stats for Partition Cols
if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
ColStatistics cStats = null;
for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
cStats = StatsUtils.getColStatsForPartCol(hivePartitionColsMap.get(partColIndxsThatRqrStats.get(i)), new PartitionIterable(partitionList.getNotDeniedPartns()), hiveConf);
hiveColStatsMap.put(partColIndxsThatRqrStats.get(i), cStats);
colStatsCached.put(cStats.getColumnName(), cStats);
if (LOG.isDebugEnabled()) {
LOG.debug("Stats for column " + cStats.getColumnName() + " in table " + hiveTblMetadata.getTableName() + " stored in cache");
LOG.debug(cStats.toString());
}
}
}
// 4. Warn user if we could get stats for required columns
if (!colNamesFailedStats.isEmpty()) {
String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats);
noColsMissingStats.getAndAdd(colNamesFailedStats.size());
if (allowNullColumnForMissingStats) {
LOG.warn(logMsg);
HiveConf conf = SessionState.getSessionConf();
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) {
LogHelper console = SessionState.getConsole();
console.printInfo(logMsg);
}
} else {
LOG.error(logMsg);
throw new RuntimeException(logMsg);
}
}
}
use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.
the class ExecDriver method main.
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
String planFileName = null;
String jobConfFileName = null;
boolean noLog = false;
String files = null;
String libjars = null;
boolean localtask = false;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-plan")) {
planFileName = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-nolog")) {
noLog = true;
} else if (args[i].equals("-files")) {
files = args[++i];
} else if (args[i].equals("-libjars")) {
libjars = args[++i];
} else if (args[i].equals("-localtask")) {
localtask = true;
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
JobConf conf;
if (localtask) {
conf = new JobConf(MapredLocalTask.class);
} else {
conf = new JobConf(ExecDriver.class);
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
// Initialize the resources from command line
if (files != null) {
conf.set("tmpfiles", files);
}
if (libjars != null) {
conf.set("tmpjars", libjars);
}
if (UserGroupInformation.isSecurityEnabled()) {
String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopAuthToken != null) {
conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
}
}
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
if (queryId.isEmpty()) {
queryId = "unknown-" + System.currentTimeMillis();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
}
System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
LogUtils.registerLoggingContext(conf);
if (noLog) {
// If started from main(), and noLog is on, we should not output
// any logs. To turn the log on, please set -Dtest.silent=false
org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
NullAppender appender = NullAppender.createNullAppender();
appender.addToLogger(logger.getName(), Level.ERROR);
appender.start();
} else {
setupChildLog4j(conf);
}
Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
LogHelper console = new LogHelper(LOG, isSilent);
if (planFileName == null) {
console.printError("Must specify Plan File Name");
printUsage();
}
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
// the plan file should always be in local directory
Path p = new Path(planFileName);
FileSystem fs = FileSystem.getLocal(conf);
InputStream pathData = fs.open(p);
// child process. so we add it here explicitly
try {
// see also - code in CliDriver.java
ClassLoader loader = conf.getClassLoader();
if (StringUtils.isNotBlank(libjars)) {
loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
}
conf.setClassLoader(loader);
// Also set this to the Thread ContextClassLoader, so new threads will
// inherit
// this class loader, and propagate into newly created Configurations by
// those
// new threads.
Thread.currentThread().setContextClassLoader(loader);
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
int ret;
if (localtask) {
memoryMXBean = ManagementFactory.getMemoryMXBean();
MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
ret = ed.executeInProcess(new DriverContext());
} else {
MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
ExecDriver ed = new ExecDriver(plan, conf, isSilent);
ret = ed.execute(new DriverContext());
}
if (ret != 0) {
System.exit(ret);
}
}
use of org.apache.hadoop.hive.ql.session.SessionState.LogHelper in project hive by apache.
the class Task method initialize.
public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) {
this.queryPlan = queryPlan;
setInitialized();
this.queryState = queryState;
if (null == this.conf) {
this.conf = queryState.getConf();
}
this.driverContext = driverContext;
console = new LogHelper(LOG);
}
Aggregations