use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class DbNotificationListener method onUpdatePartitionColumnStat.
@Override
public void onUpdatePartitionColumnStat(UpdatePartitionColumnStatEvent updatePartColStatEvent) throws MetaException {
UpdatePartitionColumnStatMessage msg = MessageBuilder.getInstance().buildUpdatePartitionColumnStatMessage(updatePartColStatEvent.getPartColStats(), updatePartColStatEvent.getPartVals(), updatePartColStatEvent.getPartParameters(), updatePartColStatEvent.getTableObj(), updatePartColStatEvent.getWriteId());
NotificationEvent event = new NotificationEvent(0, now(), EventType.UPDATE_PARTITION_COLUMN_STAT.toString(), msgEncoder.getSerializer().serialize(msg));
ColumnStatisticsDesc statDesc = updatePartColStatEvent.getPartColStats().getStatsDesc();
event.setCatName(statDesc.isSetCatName() ? statDesc.getCatName() : DEFAULT_CATALOG_NAME);
event.setDbName(statDesc.getDbName());
event.setTableName(statDesc.getTableName());
process(event, updatePartColStatEvent);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class DbNotificationListener method onUpdateTableColumnStat.
@Override
public void onUpdateTableColumnStat(UpdateTableColumnStatEvent updateTableColumnStatEvent) throws MetaException {
UpdateTableColumnStatMessage msg = MessageBuilder.getInstance().buildUpdateTableColumnStatMessage(updateTableColumnStatEvent.getColStats(), updateTableColumnStatEvent.getTableObj(), updateTableColumnStatEvent.getTableParameters(), updateTableColumnStatEvent.getWriteId());
NotificationEvent event = new NotificationEvent(0, now(), EventType.UPDATE_TABLE_COLUMN_STAT.toString(), msgEncoder.getSerializer().serialize(msg));
ColumnStatisticsDesc statDesc = updateTableColumnStatEvent.getColStats().getStatsDesc();
event.setCatName(statDesc.isSetCatName() ? statDesc.getCatName() : DEFAULT_CATALOG_NAME);
event.setDbName(statDesc.getDbName());
event.setTableName(statDesc.getTableName());
process(event, updateTableColumnStatEvent);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class ColStatsProcessor method constructColumnStatsFromPackedRows.
private boolean constructColumnStatsFromPackedRows(Table tbl, List<ColumnStatistics> stats, long maxNumStats) throws HiveException, MetaException, IOException {
String partName = null;
List<String> colName = colStatDesc.getColName();
List<String> colType = colStatDesc.getColType();
boolean isTblLevel = colStatDesc.isTblLevel();
InspectableObject packedRow;
long numStats = 0;
while ((packedRow = ftOp.getNextRow()) != null) {
if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
throw new HiveException("Unexpected object type encountered while unpacking row");
}
final List<ColumnStatisticsObj> statsObjs = new ArrayList<>();
final StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
final List<? extends StructField> fields = soi.getAllStructFieldRefs();
final List<Object> values = soi.getStructFieldsDataAsList(packedRow.o);
// Partition columns are appended at end, we only care about stats column
int pos = 0;
for (int i = 0; i < colName.size(); i++) {
String columnName = colName.get(i);
String columnType = colType.get(i);
PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(columnType);
List<ColumnStatsField> columnStatsFields = ColumnStatsType.getColumnStats(typeInfo);
try {
ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics(columnName, columnType, columnStatsFields, pos, fields, values);
statsObjs.add(statObj);
numStats++;
} catch (Exception e) {
if (isStatsReliable) {
throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
} else {
LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
}
}
pos += columnStatsFields.size();
}
if (!statsObjs.isEmpty()) {
if (!isTblLevel) {
List<FieldSchema> partColSchema = tbl.getPartCols();
List<String> partVals = new ArrayList<>();
// Iterate over partition columns to figure out partition name
for (int i = pos; i < pos + partColSchema.size(); i++) {
Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i));
partVals.add(// could be null for default partition
partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
colStats.setEngine(Constants.HIVE_ENGINE);
stats.add(colStats);
if (numStats >= maxNumStats) {
return false;
}
}
}
ftOp.clearFetchContext();
return true;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class ColumnStatsUpdateTask method getColumnStatsDesc.
private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, String partName, boolean isTblLevel) {
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
statsDesc.setDbName(dbName);
statsDesc.setTableName(tableName);
statsDesc.setIsTblLevel(isTblLevel);
if (!isTblLevel) {
statsDesc.setPartName(partName);
} else {
statsDesc.setPartName(null);
}
return statsDesc;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class ColumnStatsUpdateTask method constructColumnStatsFromInput.
private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
// If we are replicating the stats, we don't need to construct those again.
if (work.getColStats() != null) {
ColumnStatistics colStats = work.getColStats();
LOG.debug("Got stats through replication for " + colStats.getStatsDesc().getDbName() + "." + colStats.getStatsDesc().getTableName());
return colStats;
}
String dbName = work.dbName();
String tableName = work.getTableName();
String partName = work.getPartName();
String colName = work.getColName();
String columnType = work.getColType();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
// grammar prohibits more than 1 column so we are guaranteed to have only 1
// element in this lists.
statsObj.setColName(colName);
statsObj.setColType(columnType);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
longStats.setLowValueIsSet(false);
longStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
longStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
longStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
longStats.setLowValue(Long.parseLong(value));
} else if (fName.equals("highValue")) {
longStats.setHighValue(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
doubleStats.setNumNullsIsSet(false);
doubleStats.setNumDVsIsSet(false);
doubleStats.setLowValueIsSet(false);
doubleStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
doubleStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
doubleStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
doubleStats.setLowValue(Double.parseDouble(value));
} else if (fName.equals("highValue")) {
doubleStats.setHighValue(Double.parseDouble(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
// char(x),varchar(x) types
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setMaxColLenIsSet(false);
stringStats.setAvgColLenIsSet(false);
stringStats.setNumNullsIsSet(false);
stringStats.setNumDVsIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
stringStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
stringStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
stringStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
stringStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
booleanStats.setNumNullsIsSet(false);
booleanStats.setNumTruesIsSet(false);
booleanStats.setNumFalsesIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
booleanStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numTrues")) {
booleanStats.setNumTrues(Long.parseLong(value));
} else if (fName.equals("numFalses")) {
booleanStats.setNumFalses(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNullsIsSet(false);
binaryStats.setAvgColLenIsSet(false);
binaryStats.setMaxColLenIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
binaryStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
binaryStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
binaryStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (columnType.toLowerCase().startsWith("decimal")) {
// decimal(a,b) type
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
decimalStats.setNumNullsIsSet(false);
decimalStats.setNumDVsIsSet(false);
decimalStats.setLowValueIsSet(false);
decimalStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
decimalStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
decimalStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setLowValue(DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else if (fName.equals("highValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setHighValue(DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("date")) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
dateStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
dateStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
// Date high/low value is stored as long in stats DB, but allow users to set high/low
// value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
dateStats.setLowValue(readDateValue(value));
} else if (fName.equals("highValue")) {
dateStats.setHighValue(readDateValue(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("timestamp")) {
TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
timestampStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
timestampStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
timestampStats.setLowValue(readTimestampValue(value));
} else if (fName.equals("highValue")) {
timestampStats.setHighValue(readTimestampValue(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setTimestampStats(timestampStats);
statsObj.setStatsData(statsData);
} else {
throw new SemanticException("Unsupported type");
}
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, partName == null);
ColumnStatistics colStat = new ColumnStatistics();
colStat.setStatsDesc(statsDesc);
colStat.addToStatsObj(statsObj);
colStat.setEngine(Constants.HIVE_ENGINE);
return colStat;
}
Aggregations