use of org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader in project hive by apache.
the class StatsNoJobTask method aggregateStats.
private int aggregateStats(ExecutorService threadPool, Hive db) {
int ret = 0;
try {
Collection<Partition> partitions = null;
if (work.getPrunedPartitionList() == null) {
partitions = getPartitionsList();
} else {
partitions = work.getPrunedPartitionList().getPartitions();
}
// non-partitioned table
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
try {
Path dir = new Path(tTable.getSd().getLocation());
long numRows = 0;
long rawDataSize = 0;
long fileSize = 0;
long numFiles = 0;
FileSystem fs = dir.getFileSystem(conf);
FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs);
boolean statsAvailable = false;
for (FileStatus file : fileList) {
if (!file.isDir()) {
InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance(table.getInputFormatClass(), jc);
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table.getDataLocation().toString() });
if (file.getLen() == 0) {
numFiles += 1;
statsAvailable = true;
} else {
org.apache.hadoop.mapred.RecordReader<?, ?> recordReader = inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
StatsProvidingRecordReader statsRR;
if (recordReader instanceof StatsProvidingRecordReader) {
statsRR = (StatsProvidingRecordReader) recordReader;
numRows += statsRR.getStats().getRowCount();
rawDataSize += statsRR.getStats().getRawDataSize();
fileSize += file.getLen();
numFiles += 1;
statsAvailable = true;
}
recordReader.close();
}
}
}
if (statsAvailable) {
parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
db.alterTable(tableFullName, new Table(tTable), environmentContext);
String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']';
LOG.debug(msg);
console.printInfo(msg);
} else {
String msg = "Table " + tableFullName + " does not provide stats.";
LOG.debug(msg);
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats for " + tableFullName + ".", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
}
} else {
// Partitioned table
for (Partition partn : partitions) {
threadPool.execute(new StatsCollection(partn));
}
LOG.debug("Stats collection waiting for threadpool to shutdown..");
shutdownAndAwaitTermination(threadPool);
LOG.debug("Stats collection threadpool shutdown successful.");
ret = updatePartitions(db);
}
} catch (Exception e) {
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = -1;
}
}
// anything else indicates failure
return ret;
}
Aggregations