Search in sources :

Example 1 with StatsPublisher

use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.

the class FileSinkOperator method publishStats.

private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    if (statsPublisher == null) {
        // just return, stats gathering should not block the main query
        LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
        }
        return;
    }
    StatsCollectionContext sContext = new StatsCollectionContext(hconf);
    sContext.setStatsTmpDir(conf.getStatsTmpDir());
    if (!statsPublisher.connect(sContext)) {
        // just return, stats gathering should not block the main query
        LOG.error("StatsPublishing error: cannot connect to database");
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
        }
        return;
    }
    String spSpec = conf.getStaticSpec();
    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
        // DP/LB
        String fspKey = entry.getKey();
        FSPaths fspValue = entry.getValue();
        // adds the taskId to the fspKey.
        if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
            String taskID = Utilities.getTaskIdFromFilename(fspKey);
            // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
            // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
            // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
            // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
            // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
            // not be retrieved from staging table and hence not aggregated. To avoid this issue
            // we will remove the taskId from the key which is redundant anyway.
            fspKey = fspKey.split(taskID)[0];
        }
        // split[0] = DP, split[1] = LB
        String[] split = splitKey(fspKey);
        String dpSpec = split[0];
        // key = "database.table/SP/DP/"LB/
        // Hive store lowercase table name in metastore, and Counters is character case sensitive, so we
        // use lowercase table name as prefix here, as StatsTask get table name from metastore to fetch counter.
        String prefix = conf.getTableInfo().getTableName().toLowerCase();
        prefix = Utilities.join(prefix, spSpec, dpSpec);
        prefix = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR;
        Map<String, String> statsToPublish = new HashMap<String, String>();
        for (String statType : fspValue.stat.getStoredStats()) {
            statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
        }
        if (!statsPublisher.publishStat(prefix, statsToPublish)) {
            // Not changing the interface to maintain backward compatibility
            if (isStatsReliable) {
                throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
            }
        }
    }
    sContext.setIndexForTezUnion(this.getIndexForTezUnion());
    if (!statsPublisher.closeConnection(sContext)) {
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
        }
    }
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with StatsPublisher

use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.

the class AnnotateRunTimeStatsOptimizer method setRuntimeStatsDir.

private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException {
    try {
        OperatorDesc conf = op.getConf();
        if (conf != null) {
            LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
            String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString();
            StatsPublisher statsPublisher = new FSStatsPublisher();
            StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
            runtimeStatsContext.setStatsTmpDir(path);
            if (!statsPublisher.init(runtimeStatsContext)) {
                LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
                throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
            }
            conf.setRuntimeStatsTmpDir(path);
        } else {
            LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null");
        }
    } catch (HiveException e) {
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with StatsPublisher

use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.

the class Operator method publishRunTimeStats.

private void publishRunTimeStats() throws HiveException {
    StatsPublisher statsPublisher = new FSStatsPublisher();
    StatsCollectionContext sContext = new StatsCollectionContext(hconf);
    sContext.setIndexForTezUnion(indexForTezUnion);
    sContext.setStatsTmpDir(conf.getRuntimeStatsTmpDir());
    if (!statsPublisher.connect(sContext)) {
        LOG.error("StatsPublishing error: cannot connect to database");
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
    }
    String prefix = "";
    Map<String, String> statsToPublish = new HashMap<String, String>();
    statsToPublish.put(StatsSetupConst.RUN_TIME_ROW_COUNT, Long.toString(runTimeNumRows));
    if (!statsPublisher.publishStat(prefix, statsToPublish)) {
        // Not changing the interface to maintain backward compatibility
        throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
    }
    if (!statsPublisher.closeConnection(sContext)) {
        // Not changing the interface to maintain backward compatibility
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
    }
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher)

Example 4 with StatsPublisher

use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.

the class StatsTask method createStatsAggregator.

private StatsAggregator createStatsAggregator(StatsCollectionContext scc, HiveConf conf) throws HiveException {
    String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory factory = StatsFactory.newFactory(statsImpl, conf);
    if (factory == null) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }
    // initialize stats publishing table for noscan which has only stats task
    // the rest of MR task following stats task initializes it in ExecDriver.java
    StatsPublisher statsPublisher = factory.getStatsPublisher();
    if (!statsPublisher.init(scc)) {
        // creating stats table if not exists
        throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
    }
    // manufacture a StatsAggregator
    StatsAggregator statsAggregator = factory.getStatsAggregator();
    if (!statsAggregator.connect(scc)) {
        throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg(statsImpl));
    }
    return statsAggregator;
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StatsAggregator(org.apache.hadoop.hive.ql.stats.StatsAggregator)

Example 5 with StatsPublisher

use of org.apache.hadoop.hive.ql.stats.StatsPublisher in project hive by apache.

the class TableScanOperator method publishStats.

private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    StatsCollectionContext sc = new StatsCollectionContext(jc);
    sc.setStatsTmpDir(conf.getTmpStatsDir());
    if (!statsPublisher.connect(sc)) {
        // just return, stats gathering should not block the main query.
        if (isLogInfoEnabled) {
            LOG.info("StatsPublishing error: cannot connect to database.");
        }
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
        }
        return;
    }
    Map<String, String> statsToPublish = new HashMap<String, String>();
    for (String pspecs : stats.keySet()) {
        statsToPublish.clear();
        String prefix = Utilities.join(conf.getStatsAggPrefix(), pspecs);
        String key = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR;
        for (String statType : stats.get(pspecs).getStoredStats()) {
            statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));
        }
        if (!statsPublisher.publishStat(key, statsToPublish)) {
            if (isStatsReliable) {
                throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
            }
        }
        if (isLogInfoEnabled) {
            LOG.info("publishing : " + key + " : " + statsToPublish.toString());
        }
    }
    if (!statsPublisher.closeConnection(sc)) {
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
        }
    }
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap)

Aggregations

HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 StatsPublisher (org.apache.hadoop.hive.ql.stats.StatsPublisher)10 StatsCollectionContext (org.apache.hadoop.hive.ql.stats.StatsCollectionContext)9 StatsFactory (org.apache.hadoop.hive.ql.stats.StatsFactory)5 HashMap (java.util.HashMap)4 IOException (java.io.IOException)2 Path (org.apache.hadoop.fs.Path)2 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 Context (org.apache.hadoop.hive.ql.Context)2 DriverContext (org.apache.hadoop.hive.ql.DriverContext)2 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)2 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)2 FSStatsPublisher (org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher)2 JobClient (org.apache.hadoop.mapred.JobClient)2 Map (java.util.Map)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LogInitializationException (org.apache.hadoop.hive.common.LogUtils.LogInitializationException)1 TezSessionState (org.apache.hadoop.hive.ql.exec.tez.TezSessionState)1 BucketizedHiveInputFormat (org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat)1