use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class Hive method setPartitionColumnStatistics.
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException {
try {
ColumnStatistics colStat = request.getColStats().get(0);
ColumnStatisticsDesc statsDesc = colStat.getStatsDesc();
// list obtained from the source. Just use it.
if (request.getWriteId() <= 0 || request.getValidWriteIdList() == null) {
Table tbl = getTable(statsDesc.getDbName(), statsDesc.getTableName());
AcidUtils.TableSnapshot tableSnapshot = AcidUtils.getTableSnapshot(conf, tbl, true);
request.setValidWriteIdList(tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null);
request.setWriteId(tableSnapshot != null ? tableSnapshot.getWriteId() : 0);
}
return getMSC().setPartitionColumnStatistics(request);
} catch (Exception e) {
LOG.debug("Failed setPartitionColumnStatistics", e);
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class FSTableEvent method addPartitionDesc.
private AlterTableAddPartitionDesc addPartitionDesc(Path fromPath, ImportTableDesc tblDesc, Partition partition) throws SemanticException {
try {
Map<String, String> partitionSpec = EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues());
StorageDescriptor sd = partition.getSd();
String location = sd.getLocation();
if (!tblDesc.isExternal()) {
/**
* this is required for file listing of all files in a partition for managed table as described in
* {@link org.apache.hadoop.hive.ql.exec.repl.bootstrap.events.filesystem.BootstrapEventsIterator}
*/
location = new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString();
}
ColumnStatistics columnStatistics = null;
long writeId = -1;
if (partition.isSetColStats()) {
ColumnStatistics colStats = partition.getColStats();
ColumnStatisticsDesc colStatsDesc = new ColumnStatisticsDesc(colStats.getStatsDesc());
colStatsDesc.setTableName(tblDesc.getTableName());
colStatsDesc.setDbName(tblDesc.getDatabaseName());
columnStatistics = new ColumnStatistics(colStatsDesc, colStats.getStatsObj());
columnStatistics.setEngine(colStats.getEngine());
writeId = partition.getWriteId();
}
AlterTableAddPartitionDesc.PartitionDesc partitionDesc = new AlterTableAddPartitionDesc.PartitionDesc(partitionSpec, location, partition.getParameters(), sd.getInputFormat(), sd.getOutputFormat(), sd.getNumBuckets(), sd.getCols(), sd.getSerdeInfo().getSerializationLib(), sd.getSerdeInfo().getParameters(), sd.getBucketCols(), sd.getSortCols(), columnStatistics, writeId);
AlterTableAddPartitionDesc addPartitionDesc = new AlterTableAddPartitionDesc(tblDesc.getDatabaseName(), tblDesc.getTableName(), true, ImmutableList.of(partitionDesc));
addPartitionDesc.setReplicationSpec(replicationSpec());
return addPartitionDesc;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class DbNotificationListener method onUpdatePartitionColumnStatInBatch.
@Override
public void onUpdatePartitionColumnStatInBatch(UpdatePartitionColumnStatEventBatch updatePartColStatEventBatch, Connection dbConn, SQLGenerator sqlGenerator) throws MetaException {
List<NotificationEvent> eventBatch = new ArrayList<>();
List<ListenerEvent> listenerEventBatch = new ArrayList<>();
for (int i = 0; i < updatePartColStatEventBatch.getNumEntries(); i++) {
UpdatePartitionColumnStatEvent updatePartColStatEvent = updatePartColStatEventBatch.getPartColStatEvent(i);
UpdatePartitionColumnStatMessage msg = MessageBuilder.getInstance().buildUpdatePartitionColumnStatMessage(updatePartColStatEvent.getPartColStats(), updatePartColStatEvent.getPartVals(), updatePartColStatEvent.getPartParameters(), updatePartColStatEvent.getTableObj(), updatePartColStatEvent.getWriteId());
ColumnStatisticsDesc statDesc = updatePartColStatEvent.getPartColStats().getStatsDesc();
NotificationEvent event = new NotificationEvent(0, now(), EventType.UPDATE_PARTITION_COLUMN_STAT.toString(), msgEncoder.getSerializer().serialize(msg));
event.setCatName(statDesc.isSetCatName() ? statDesc.getCatName() : DEFAULT_CATALOG_NAME);
event.setDbName(statDesc.getDbName());
event.setTableName(statDesc.getTableName());
eventBatch.add(event);
listenerEventBatch.add(updatePartColStatEvent);
}
try {
addNotificationLogBatch(eventBatch, listenerEventBatch, dbConn, sqlGenerator);
} catch (SQLException e) {
throw new MetaException("Unable to execute direct SQL " + StringUtils.stringifyException(e));
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestStats method buildStatsForOneTableOrPartition.
private ColumnStatistics buildStatsForOneTableOrPartition(String catName, String dbName, String tableName, String partName, Collection<Column> cols) {
ColumnStatisticsDesc desc = new ColumnStatisticsDesc(partName == null, dbName, tableName);
if (!NO_CAT.equals(catName))
desc.setCatName(catName);
if (partName != null)
desc.setPartName(partName);
List<ColumnStatisticsObj> objs = new ArrayList<>(cols.size());
for (Column col : cols) objs.add(col.generate());
ColumnStatistics columnStatistics = new ColumnStatistics(desc, objs);
columnStatistics.setEngine(ENGINE);
return columnStatistics;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class HiveAlterHandler method alterTableUpdateTableColumnStats.
@VisibleForTesting
public static List<ColumnStatistics> alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable, EnvironmentContext ec, String validWriteIds, Configuration conf, List<String> deletedCols) throws MetaException, InvalidObjectException {
String catName = normalizeIdentifier(oldTable.isSetCatName() ? oldTable.getCatName() : getDefaultCatalog(conf));
String dbName = oldTable.getDbName().toLowerCase();
String tableName = normalizeIdentifier(oldTable.getTableName());
String newDbName = newTable.getDbName().toLowerCase();
String newTableName = normalizeIdentifier(newTable.getTableName());
// if its not called from cahced store then update the table
boolean doAlterTable = deletedCols == null;
List<ColumnStatistics> newMultiColStats = new ArrayList<>();
try {
List<FieldSchema> oldCols = oldTable.getSd().getCols();
List<FieldSchema> newCols = newTable.getSd().getCols();
List<ColumnStatistics> multiColStats = null;
boolean updateColumnStats = !newDbName.equals(dbName) || !newTableName.equals(tableName) || !MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols);
// Don't bother in the case of ACID conversion.
updateColumnStats = updateColumnStats && (TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable));
if (updateColumnStats) {
List<String> oldColNames = new ArrayList<>(oldCols.size());
for (FieldSchema oldCol : oldCols) {
oldColNames.add(oldCol.getName());
}
// NOTE: this doesn't check stats being compliant, but the alterTable call below does.
// The worst we can do is delete the stats.
// Collect column stats which need to be rewritten and remove old stats.
multiColStats = msdb.getTableColumnStatistics(catName, dbName, tableName, oldColNames);
if (multiColStats.isEmpty()) {
updateColumnStats = false;
} else {
for (ColumnStatistics colStats : multiColStats) {
List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
if (statsObjs != null) {
// for out para, this value is initialized by caller.
if (deletedCols == null) {
deletedCols = new ArrayList<>();
}
List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
for (ColumnStatisticsObj statsObj : statsObjs) {
boolean found = false;
for (FieldSchema newCol : newCols) {
if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
found = true;
break;
}
}
if (found) {
if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) {
if (doAlterTable) {
msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName(), colStats.getEngine());
}
newStatsObjs.add(statsObj);
deletedCols.add(statsObj.getColName());
}
} else {
if (doAlterTable) {
msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName(), colStats.getEngine());
}
deletedCols.add(statsObj.getColName());
}
}
if (doAlterTable) {
StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols);
// Change stats
ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
statsDesc.setDbName(newDbName);
statsDesc.setTableName(newTableName);
colStats.setStatsObj(newStatsObjs);
newMultiColStats.add(colStats);
}
}
}
}
}
if (doAlterTable) {
Deadline.checkTimeout();
// Change to new table and append stats for the new table
msdb.alterTable(catName, dbName, tableName, newTable, validWriteIds);
if (updateColumnStats) {
for (ColumnStatistics colStats : newMultiColStats) {
msdb.updateTableColumnStatistics(colStats, validWriteIds, newTable.getWriteId());
}
}
}
} catch (NoSuchObjectException nsoe) {
LOG.debug("Could not find db entry." + nsoe);
} catch (InvalidInputException e) {
// should not happen since the input were verified before passed in
throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
}
return newMultiColStats;
}
Aggregations