use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project flink by apache.
the class HiveCatalog method getPartitionColumnStatistics.
@Override
public CatalogColumnStatistics getPartitionColumnStatistics(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) throws PartitionNotExistException, CatalogException {
try {
Partition partition = getHivePartition(tablePath, partitionSpec);
Table hiveTable = getHiveTable(tablePath);
String partName = getEscapedPartitionName(tablePath, partitionSpec, hiveTable);
List<String> partNames = new ArrayList<>();
partNames.add(partName);
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = client.getPartitionColumnStatistics(partition.getDbName(), partition.getTableName(), partNames, getFieldNames(partition.getSd().getCols()));
List<ColumnStatisticsObj> columnStatisticsObjs = partitionColumnStatistics.get(partName);
if (columnStatisticsObjs != null && !columnStatisticsObjs.isEmpty()) {
return new CatalogColumnStatistics(HiveStatsUtil.createCatalogColumnStats(columnStatisticsObjs, hiveVersion));
} else {
return CatalogColumnStatistics.UNKNOWN;
}
} catch (TableNotExistException | PartitionSpecInvalidException e) {
throw new PartitionNotExistException(getName(), tablePath, partitionSpec);
} catch (TException e) {
throw new CatalogException(String.format("Failed to get table stats of table %s 's partition %s", tablePath.getFullName(), String.valueOf(partitionSpec)), e);
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class TestMRCompactorOnTez method testCompactorGatherStats.
@Test
public void testCompactorGatherStats() throws Exception {
conf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, true);
conf.setVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE, CUSTOM_COMPACTION_QUEUE);
conf.setBoolVar(HiveConf.ConfVars.HIVE_MR_COMPACTOR_GATHER_STATS, true);
conf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
String tmpFolder = folder.newFolder().getAbsolutePath();
conf.setVar(HiveConf.ConfVars.HIVE_PROTO_EVENTS_BASE_PATH, tmpFolder);
String dbName = "default";
String tableName = "stats_comp_test";
List<String> colNames = Arrays.asList("a");
executeStatementOnDriver("drop table if exists " + dbName + "." + tableName, driver);
executeStatementOnDriver("create table " + dbName + "." + tableName + " (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(1)", driver);
// Make sure we do not have statistics for this table yet
// Compaction generates stats only if there is any
IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
executeStatementOnDriver("analyze table " + dbName + "." + tableName + " compute statistics for columns", driver);
executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(2)", driver);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, HiveProtoLoggingHook.class.getName());
// Run major compaction and cleaner
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, false);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, StringUtils.EMPTY);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(1);
List<ColumnStatisticsObj> colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
assertEquals("Stats should be there", 1, colStats.size());
assertEquals("Value should contain new data", 2, colStats.get(0).getStatsData().getLongStats().getHighValue());
assertEquals("Value should contain new data", 1, colStats.get(0).getStatsData().getLongStats().getLowValue());
executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(3)", driver);
executeStatementOnDriver("alter table " + dbName + "." + tableName + " set tblproperties('compactor.mapred.job.queue.name'='" + CUSTOM_COMPACTION_QUEUE + "')", driver);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, HiveProtoLoggingHook.class.getName());
// Run major compaction and cleaner
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, false);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, StringUtils.EMPTY);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(2);
colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
assertEquals("Stats should be there", 1, colStats.size());
assertEquals("Value should contain new data", 3, colStats.get(0).getStatsData().getLongStats().getHighValue());
assertEquals("Value should contain new data", 1, colStats.get(0).getStatsData().getLongStats().getLowValue());
executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(4)", driver);
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, HiveProtoLoggingHook.class.getName());
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MAJOR, false, Collections.singletonMap("compactor.mapred.job.queue.name", CUSTOM_COMPACTION_QUEUE));
conf.setVar(HiveConf.ConfVars.PREEXECHOOKS, StringUtils.EMPTY);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(3);
colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE);
assertEquals("Stats should be there", 1, colStats.size());
assertEquals("Value should contain new data", 4, colStats.get(0).getStatsData().getLongStats().getHighValue());
assertEquals("Value should contain new data", 1, colStats.get(0).getStatsData().getLongStats().getLowValue());
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class DescTableOperation method getColumnDataColPathSpecified.
private void getColumnDataColPathSpecified(Table table, Partition part, List<FieldSchema> cols, List<ColumnStatisticsObj> colStats, Deserializer deserializer) throws SemanticException, HiveException, MetaException {
// when column name is specified in describe table DDL, colPath will be db_name.table_name.column_name
String colName = desc.getColumnPath().split("\\.")[2];
List<String> colNames = Lists.newArrayList(colName.toLowerCase());
TableName tableName = HiveTableName.of(desc.getDbTableName());
if (null == part) {
if (table.isPartitioned()) {
Map<String, String> tableProps = table.getParameters() == null ? new HashMap<String, String>() : table.getParameters();
if (table.isPartitionKey(colNames.get(0))) {
getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, tableProps);
} else {
getColumnsForNotPartitionKeyColumn(cols, colStats, deserializer, colNames, tableName, tableProps);
}
table.setParameters(tableProps);
} else {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer));
colStats.addAll(context.getDb().getTableColumnStatistics(tableName.getDb().toLowerCase(), tableName.getTable().toLowerCase(), colNames, false));
}
} else {
List<String> partitions = new ArrayList<String>();
// The partition name is converted to lowercase before generating the stats. So we should use the same
// lower case name to get the stats.
String partName = HMSHandler.lowerCaseConvertPartName(part.getName());
partitions.add(partName);
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer));
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = context.getDb().getPartitionColumnStatistics(tableName.getDb().toLowerCase(), tableName.getTable().toLowerCase(), partitions, colNames, false);
List<ColumnStatisticsObj> partitionColStat = partitionColumnStatistics.get(partName);
if (partitionColStat != null) {
colStats.addAll(partitionColStat);
}
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class DescTableOperation method execute.
@Override
public int execute() throws Exception {
Table table = getTable();
Partition part = getPartition(table);
final String dbTableName = desc.getDbTableName();
try (DataOutputStream outStream = ShowUtils.getOutputStream(new Path(desc.getResFile()), context)) {
LOG.debug("DDLTask: got data for {}", dbTableName);
List<FieldSchema> cols = new ArrayList<>();
List<ColumnStatisticsObj> colStats = new ArrayList<>();
Deserializer deserializer = getDeserializer(table);
if (desc.getColumnPath() == null) {
getColumnsNoColumnPath(table, part, cols);
} else {
if (desc.isFormatted()) {
getColumnDataColPathSpecified(table, part, cols, colStats, deserializer);
} else {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer));
}
}
fixDecimalColumnTypeName(cols);
setConstraintsAndStorageHandlerInfo(table);
handleMaterializedView(table);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
DescTableFormatter formatter = DescTableFormatter.getFormatter(context.getConf());
formatter.describeTable(context.getConf(), outStream, desc.getColumnPath(), dbTableName, table, part, cols, desc.isFormatted(), desc.isExtended(), isOutputPadded, colStats);
LOG.debug("DDLTask: written data for {}", dbTableName);
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, dbTableName);
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class DDLPlanUtils method getAlterTableStmtTableStatsColsAll.
/**
* Parses the ColumnStatistics for all the columns in a given table and adds the alter table update
* statistics command for each column.
*
* @param tbl
*/
public List<String> getAlterTableStmtTableStatsColsAll(Table tbl) throws HiveException {
List<String> alterTblStmt = new ArrayList<String>();
List<String> accessedColumns = getTableColumnNames(tbl);
List<ColumnStatisticsObj> tableColumnStatistics = Hive.get().getTableColumnStatistics(tbl.getDbName(), tbl.getTableName(), accessedColumns, true);
ColumnStatisticsObj[] columnStatisticsObj = tableColumnStatistics.toArray(new ColumnStatisticsObj[0]);
for (int i = 0; i < columnStatisticsObj.length; i++) {
alterTblStmt.add(getAlterTableStmtCol(columnStatisticsObj[i].getStatsData(), columnStatisticsObj[i].getColName(), tbl.getTableName(), tbl.getDbName()));
String base64 = checkBitVectors(columnStatisticsObj[i].getStatsData());
if (base64 != null) {
ST command = new ST(EXIST_BIT_VECTORS);
command.add(DATABASE_NAME, tbl.getDbName());
command.add(TABLE_NAME, tbl.getTableName());
command.add(COLUMN_NAME, columnStatisticsObj[i].getColName());
command.add(BASE_64_VALUE, base64);
alterTblStmt.add(command.render());
}
}
return alterTblStmt;
}
Aggregations