use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLTask method describeTable.
/**
* Write the description of a table to a file.
*
* @param db
* The database in question.
* @param descTbl
* This is the table we're interested in.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
* @throws MetaException
*/
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
String colPath = descTbl.getColumnPath();
String tableName = descTbl.getTableName();
// describe the table - populate the output stream
Table tbl = db.getTable(tableName, false);
if (tbl == null) {
throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
}
Partition part = null;
if (descTbl.getPartSpec() != null) {
part = db.getPartition(tbl, descTbl.getPartSpec(), false);
if (part == null) {
throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
}
tbl = part.getTable();
}
DataOutputStream outStream = getOutputStream(descTbl.getResFile());
try {
LOG.debug("DDLTask: got data for " + tbl.getTableName());
List<FieldSchema> cols = null;
List<ColumnStatisticsObj> colStats = null;
Deserializer deserializer = tbl.getDeserializer(true);
if (deserializer instanceof AbstractSerDe) {
String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
if (errorMsgs != null && !errorMsgs.isEmpty()) {
throw new SQLException(errorMsgs);
}
}
if (colPath.equals(tableName)) {
cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
if (!descTbl.isFormatted()) {
cols.addAll(tbl.getPartCols());
}
if (tbl.isPartitioned() && part == null) {
// No partitioned specified for partitioned table, lets fetch all.
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
for (String stat : StatsSetupConst.supportedStats) {
boolean state = true;
long statVal = 0l;
for (Partition partition : parts) {
Map<String, String> props = partition.getParameters();
state &= StatsSetupConst.areBasicStatsUptoDate(props);
if (props != null && props.get(stat) != null) {
statVal += Long.parseLong(props.get(stat));
}
}
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(state));
tblProps.put(stat, String.valueOf(statVal));
}
tbl.setParameters(tblProps);
}
} else {
if (descTbl.isFormatted()) {
// when column name is specified in describe table DDL, colPath will
// will be table_name.column_name
String colName = colPath.split("\\.")[1];
String[] dbTab = Utilities.getDbTableName(tableName);
List<String> colNames = new ArrayList<String>();
colNames.add(colName.toLowerCase());
if (null == part) {
if (tbl.isPartitioned()) {
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
if (tbl.isPartitionKey(colNames.get(0))) {
FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
cols = Collections.singletonList(partCol);
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
ColumnStatisticsData data = new ColumnStatisticsData();
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats = Collections.singletonList(cso);
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
colStats = aggrStats.getColStats();
if (parts.size() == aggrStats.getPartsFound()) {
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
StatsSetupConst.removeColumnStatsState(tblProps, colNames);
}
}
tbl.setParameters(tblProps);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
}
} else {
List<String> partitions = new ArrayList<String>();
partitions.add(part.getName());
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
}
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
}
}
PrimaryKeyInfo pkInfo = null;
ForeignKeyInfo fkInfo = null;
if (descTbl.isExt() || descTbl.isFormatted()) {
pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
}
fixDecimalColumnTypeName(cols);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), descTbl.isPretty(), isOutputPadded, colStats, pkInfo, fkInfo);
LOG.debug("DDLTask: written data for " + tbl.getTableName());
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
} finally {
IOUtils.closeStream(outStream);
}
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class Driver method getTablePartitionUsedColumns.
private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
// table to columns mapping (tab2Cols)
if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
ParseContext parseCtx = querySem.getParseContext();
for (Map.Entry<String, TableScanOperator> topOpMap : querySem.getParseContext().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
if (!tableScanOp.isInsideView()) {
Table tbl = tableScanOp.getConf().getTableMetadata();
List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
List<FieldSchema> columns = tbl.getCols();
List<String> cols = new ArrayList<String>();
for (int i = 0; i < neededColumnIds.size(); i++) {
cols.add(columns.get(neededColumnIds.get(i)).getName());
}
// table permission
if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id);
Set<Partition> parts = partsList.getPartitions();
for (Partition part : parts) {
List<String> existingCols = part2Cols.get(part);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
part2Cols.put(part, existingCols);
}
} else {
List<String> existingCols = tab2Cols.get(tbl);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
tab2Cols.put(tbl, existingCols);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class ArchiveUtils method conflictingArchiveNameOrNull.
/**
* Determines if one can insert into partition(s), or there's a conflict with
* archive. It can be because partition is itself archived or it is to be
* created inside existing archive. The second case is when partition doesn't
* exist yet, but it would be inside of an archive if it existed. This one is
* quite tricky to check, we need to find at least one partition inside of
* the parent directory. If it is archived and archiving level tells that
* the archival was done of directory partition is in it means we cannot
* insert; otherwise we can.
* This method works both for full specifications and partial ones - in second
* case it checks if any partition that could possibly match such
* specification is inside archive.
*
* @param db - Hive object
* @param tbl - table where partition is
* @param partSpec - partition specification with possible nulls in case of
* dynamic partiton inserts
* @return null if partition can be inserted, string with colliding archive
* name when it can't
* @throws HiveException
*/
public static String conflictingArchiveNameOrNull(Hive db, Table tbl, LinkedHashMap<String, String> partSpec) throws HiveException {
List<FieldSchema> partKeys = tbl.getPartitionKeys();
int partSpecLevel = 0;
for (FieldSchema partKey : partKeys) {
if (!partSpec.containsKey(partKey.getName())) {
break;
}
partSpecLevel++;
}
if (partSpecLevel != partSpec.size()) {
throw new HiveException("partspec " + partSpec + " is wrong for table " + tbl.getTableName());
}
Map<String, String> spec = new HashMap<String, String>(partSpec);
List<String> reversedKeys = new LinkedList<String>();
for (FieldSchema fs : tbl.getPartCols()) {
if (spec.containsKey(fs.getName())) {
reversedKeys.add(0, fs.getName());
}
}
for (String rk : reversedKeys) {
List<Partition> parts = db.getPartitions(tbl, spec, (short) 1);
if (parts.size() != 0) {
Partition p = parts.get(0);
if (!isArchived(p)) {
// no archiving was done neither at this nor at upper level
return null;
} else if (getArchivingLevel(p) > spec.size()) {
// it is not, which means no archiving at this or upper level
return null;
} else {
return getPartialName(p, getArchivingLevel(p));
}
}
spec.remove(rk);
}
return null;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class StatsTask method getPartitionsList.
/**
* Get the list of partitions that need to update statistics.
* TODO: we should reuse the Partitions generated at compile time
* since getting the list of partitions is quite expensive.
*
* @return a list of partitions that need to update statistics.
* @throws HiveException
*/
private List<Partition> getPartitionsList(Hive db) throws HiveException {
if (work.getLoadFileDesc() != null) {
//we are in CTAS, so we know there are no partitions
return null;
}
List<Partition> list = new ArrayList<Partition>();
if (work.getTableSpecs() != null) {
// ANALYZE command
TableSpec tblSpec = work.getTableSpecs();
table = tblSpec.tableHandle;
if (!table.isPartitioned()) {
return null;
}
// get all partitions that matches with the partition spec
List<Partition> partitions = tblSpec.partitions;
if (partitions != null) {
for (Partition partn : partitions) {
list.add(partn);
}
}
} else if (work.getLoadTableDesc() != null) {
// INSERT OVERWRITE command
LoadTableDesc tbd = work.getLoadTableDesc();
table = db.getTable(tbd.getTable().getTableName());
if (!table.isPartitioned()) {
return null;
}
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// If no dynamic partitions are generated, dpPartSpecs may not be initialized
if (dpPartSpecs != null) {
// load the list of DP partitions and return the list of partition specs
list.addAll(dpPartSpecs);
}
} else {
// static partition
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
list.add(partn);
}
}
return list;
}
Aggregations