use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.
the class FileSinkOperator method publishStats.
private void publishStats() throws HiveException {
boolean isStatsReliable = conf.isStatsReliable();
// Initializing a stats publisher
StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
if (statsPublisher == null) {
// just return, stats gathering should not block the main query
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
return;
}
StatsCollectionContext sContext = new StatsCollectionContext(hconf);
sContext.setStatsTmpDir(conf.getStatsTmpDir());
if (!statsPublisher.connect(sContext)) {
// just return, stats gathering should not block the main query
LOG.error("StatsPublishing error: cannot connect to database");
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
}
return;
}
String spSpec = conf.getStaticSpec();
for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
// DP/LB
String fspKey = entry.getKey();
FSPaths fspValue = entry.getValue();
// adds the taskId to the fspKey.
if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
String taskID = Utilities.getTaskIdFromFilename(fspKey);
// if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
// and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
// to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
// When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
// Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
// not be retrieved from staging table and hence not aggregated. To avoid this issue
// we will remove the taskId from the key which is redundant anyway.
fspKey = fspKey.split(taskID)[0];
}
// split[0] = DP, split[1] = LB
String[] split = splitKey(fspKey);
String dpSpec = split[0];
// key = "database.table/SP/DP/"LB/
// Hive store lowercase table name in metastore, and Counters is character case sensitive, so we
// use lowercase table name as prefix here, as StatsTask get table name from metastore to fetch counter.
String prefix = conf.getTableInfo().getTableName().toLowerCase();
prefix = Utilities.join(prefix, spSpec, dpSpec);
prefix = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR;
Map<String, String> statsToPublish = new HashMap<String, String>();
for (String statType : fspValue.stat.getStoredStats()) {
statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
}
if (!statsPublisher.publishStat(prefix, statsToPublish)) {
// Not changing the interface to maintain backward compatibility
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
}
}
}
sContext.setIndexForTezUnion(this.getIndexForTezUnion());
if (!statsPublisher.closeConnection(sContext)) {
// Not changing the interface to maintain backward compatibility
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
}
}
}
use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.
the class AnnotateRunTimeStatsOptimizer method setRuntimeStatsDir.
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException {
try {
OperatorDesc conf = op.getConf();
if (conf != null) {
LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString();
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
runtimeStatsContext.setStatsTmpDir(path);
if (!statsPublisher.init(runtimeStatsContext)) {
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
conf.setRuntimeStatsTmpDir(path);
} else {
LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null");
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.
the class Operator method publishRunTimeStats.
private void publishRunTimeStats() throws HiveException {
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext sContext = new StatsCollectionContext(hconf);
sContext.setIndexForTezUnion(indexForTezUnion);
sContext.setStatsTmpDir(conf.getRuntimeStatsTmpDir());
if (!statsPublisher.connect(sContext)) {
LOG.error("StatsPublishing error: cannot connect to database");
throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
}
String prefix = "";
Map<String, String> statsToPublish = new HashMap<String, String>();
statsToPublish.put(StatsSetupConst.RUN_TIME_ROW_COUNT, Long.toString(runTimeNumRows));
if (!statsPublisher.publishStat(prefix, statsToPublish)) {
// Not changing the interface to maintain backward compatibility
throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
}
if (!statsPublisher.closeConnection(sContext)) {
// Not changing the interface to maintain backward compatibility
throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
}
}
use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.
the class StatsTask method createStatsAggregator.
private StatsAggregator createStatsAggregator(StatsCollectionContext scc, HiveConf conf) throws HiveException {
String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
StatsFactory factory = StatsFactory.newFactory(statsImpl, conf);
if (factory == null) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
// initialize stats publishing table for noscan which has only stats task
// the rest of MR task following stats task initializes it in ExecDriver.java
StatsPublisher statsPublisher = factory.getStatsPublisher();
if (!statsPublisher.init(scc)) {
// creating stats table if not exists
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
// manufacture a StatsAggregator
StatsAggregator statsAggregator = factory.getStatsAggregator();
if (!statsAggregator.connect(scc)) {
throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg(statsImpl));
}
return statsAggregator;
}
use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.
the class TableScanOperator method publishStats.
private void publishStats() throws HiveException {
boolean isStatsReliable = conf.isStatsReliable();
// Initializing a stats publisher
StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
StatsCollectionContext sc = new StatsCollectionContext(jc);
sc.setStatsTmpDir(conf.getTmpStatsDir());
if (!statsPublisher.connect(sc)) {
// just return, stats gathering should not block the main query.
if (isLogInfoEnabled) {
LOG.info("StatsPublishing error: cannot connect to database.");
}
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
}
return;
}
Map<String, String> statsToPublish = new HashMap<String, String>();
for (String pspecs : stats.keySet()) {
statsToPublish.clear();
String prefix = Utilities.join(conf.getStatsAggPrefix(), pspecs);
String key = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR;
for (String statType : stats.get(pspecs).getStoredStats()) {
statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));
}
if (!statsPublisher.publishStat(key, statsToPublish)) {
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
}
}
if (isLogInfoEnabled) {
LOG.info("publishing : " + key + " : " + statsToPublish.toString());
}
}
if (!statsPublisher.closeConnection(sc)) {
if (isStatsReliable) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
}
}
}
Aggregations