use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.
the class ColumnStatsMergerFactory method newColumnStaticsObj.
public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
ColumnStatisticsObj cso = new ColumnStatisticsObj();
ColumnStatisticsData csd = new ColumnStatisticsData();
cso.setColName(colName);
cso.setColType(colType);
switch(type) {
case BOOLEAN_STATS:
csd.setBooleanStats(new BooleanColumnStatsData());
break;
case LONG_STATS:
csd.setLongStats(new LongColumnStatsData());
break;
case DOUBLE_STATS:
csd.setDoubleStats(new DoubleColumnStatsData());
break;
case STRING_STATS:
csd.setStringStats(new StringColumnStatsData());
break;
case BINARY_STATS:
csd.setBinaryStats(new BinaryColumnStatsData());
break;
case DECIMAL_STATS:
csd.setDecimalStats(new DecimalColumnStatsData());
break;
default:
throw new RuntimeException("Woh, bad. Unknown stats type!");
}
cso.setStatsData(csd);
return cso;
}
use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.
the class ColumnStatsTask method unpackPrimitiveObject.
private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) {
if (o == null) {
return;
}
// First infer the type of object
if (fieldName.equals("columntype")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase("long")) {
LongColumnStatsData longStats = new LongColumnStatsData();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("double")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("string")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
if (statsObj.getStatsData().isSetBooleanStats()) {
unpackBooleanStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetLongStats()) {
unpackLongStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
unpackDoubleStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetStringStats()) {
unpackStringStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
unpackDecimalStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDateStats()) {
unpackDateStats(oi, o, fieldName, statsObj);
}
}
}
use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.
the class TestHBaseStoreBitVector method stringPartitionStatistics.
@Test
public void stringPartitionStatistics() throws Exception {
createMockTableAndPartition(STRING_TYPE, STRING_VAL);
// Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
// Because of the way our mock implementation works we actually need to not create the table
// before we set statistics on it.
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for partition level stats
ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = stringColStatsObjs.get(0);
StringColumnStatsData stringData = obj.getStatsData().getStringStats();
// Add to DB
stats.addToStatsObj(obj);
List<String> parVals = new ArrayList<String>();
parVals.add(STRING_VAL);
store.updatePartitionColumnStatistics(stats, parVals);
// Get from DB
List<String> partNames = new ArrayList<String>();
partNames.add(desc.getPartName());
List<String> colNames = new ArrayList<String>();
colNames.add(obj.getColName());
List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
// Compare ColumnStatisticsDesc
Assert.assertEquals(1, statsFromDB.size());
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
// Compare StringColumnStatsData
StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
}
use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.
the class ColumnStatsUpdateTask method constructColumnStatsFromInput.
private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
String dbName = SessionState.get().getCurrentDatabase();
ColumnStatsDesc desc = work.getColStats();
String tableName = desc.getTableName();
String partName = work.getPartName();
List<String> colName = desc.getColName();
List<String> colType = desc.getColType();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
// grammar prohibits more than 1 column so we are guaranteed to have only 1
// element in this lists.
statsObj.setColName(colName.get(0));
statsObj.setColType(colType.get(0));
ColumnStatisticsData statsData = new ColumnStatisticsData();
String columnType = colType.get(0);
if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
longStats.setLowValueIsSet(false);
longStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
longStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
longStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
longStats.setLowValue(Long.parseLong(value));
} else if (fName.equals("highValue")) {
longStats.setHighValue(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNullsIsSet(false);
doubleStats.setNumDVsIsSet(false);
doubleStats.setLowValueIsSet(false);
doubleStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
doubleStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
doubleStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
doubleStats.setLowValue(Double.parseDouble(value));
} else if (fName.equals("highValue")) {
doubleStats.setHighValue(Double.parseDouble(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
//char(x),varchar(x) types
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setMaxColLenIsSet(false);
stringStats.setAvgColLenIsSet(false);
stringStats.setNumNullsIsSet(false);
stringStats.setNumDVsIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
stringStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
stringStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
stringStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
stringStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
booleanStats.setNumNullsIsSet(false);
booleanStats.setNumTruesIsSet(false);
booleanStats.setNumFalsesIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
booleanStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numTrues")) {
booleanStats.setNumTrues(Long.parseLong(value));
} else if (fName.equals("numFalses")) {
booleanStats.setNumFalses(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNullsIsSet(false);
binaryStats.setAvgColLenIsSet(false);
binaryStats.setMaxColLenIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
binaryStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
binaryStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
binaryStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (columnType.toLowerCase().startsWith("decimal")) {
//decimal(a,b) type
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNullsIsSet(false);
decimalStats.setNumDVsIsSet(false);
decimalStats.setLowValueIsSet(false);
decimalStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
decimalStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
decimalStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else if (fName.equals("highValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("date") || columnType.equalsIgnoreCase("timestamp")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
dateStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
dateStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
// Date high/low value is stored as long in stats DB, but allow users to set high/low
// value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
dateStats.setLowValue(readDateValue(value));
} else if (fName.equals("highValue")) {
dateStats.setHighValue(readDateValue(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else {
throw new SemanticException("Unsupported type");
}
String[] names = Utilities.getDbTableName(dbName, tableName);
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, partName == null);
ColumnStatistics colStat = new ColumnStatistics();
colStat.setStatsDesc(statsDesc);
colStat.addToStatsObj(statsObj);
return colStat;
}
use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.
the class TestHiveMetaStore method testColumnStatistics.
public void testColumnStatistics() throws Throwable {
String dbName = "columnstatstestdb";
String tblName = "tbl";
String typeName = "Person";
String tblOwner = "testowner";
int lastAccessed = 6796;
try {
cleanUp(dbName, tblName, typeName);
Database db = new Database();
db.setName(dbName);
client.createDatabase(db);
createTableForTestFilter(dbName, tblName, tblOwner, lastAccessed, true);
// Create a ColumnStatistics Obj
String[] colName = new String[] { "income", "name" };
double lowValue = 50000.21;
double highValue = 1200000.4525;
long numNulls = 3;
long numDVs = 22;
double avgColLen = 50.30;
long maxColLen = 102;
String[] colType = new String[] { "double", "string" };
boolean isTblLevel = true;
String partName = null;
List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
statsDesc.setDbName(dbName);
statsDesc.setTableName(tblName);
statsDesc.setIsTblLevel(isTblLevel);
statsDesc.setPartName(partName);
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName[0]);
statsObj.setColType(colType[0]);
ColumnStatisticsData statsData = new ColumnStatisticsData();
DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
statsData.setDoubleStats(numericStats);
statsData.getDoubleStats().setHighValue(highValue);
statsData.getDoubleStats().setLowValue(lowValue);
statsData.getDoubleStats().setNumDVs(numDVs);
statsData.getDoubleStats().setNumNulls(numNulls);
statsObj.setStatsData(statsData);
statsObjs.add(statsObj);
statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName[1]);
statsObj.setColType(colType[1]);
statsData = new ColumnStatisticsData();
StringColumnStatsData stringStats = new StringColumnStatsData();
statsData.setStringStats(stringStats);
statsData.getStringStats().setAvgColLen(avgColLen);
statsData.getStringStats().setMaxColLen(maxColLen);
statsData.getStringStats().setNumDVs(numDVs);
statsData.getStringStats().setNumNulls(numNulls);
statsObj.setStatsData(statsData);
statsObjs.add(statsObj);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
// write stats objs persistently
client.updateTableColumnStatistics(colStats);
// retrieve the stats obj that was just written
ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
assertEquals(colStats2.getColName(), colName[0]);
assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue);
assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue);
assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls);
assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs);
// test delete column stats; if no col name is passed all column stats associated with the
// table is deleted
boolean status = client.deleteTableColumnStatistics(dbName, tblName, null);
assertTrue(status);
// try to query stats for a column for which stats doesn't exist
assertTrue(client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[1])).isEmpty());
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
// update table level column stats
client.updateTableColumnStatistics(colStats);
// query column stats for column whose stats were updated in the previous call
colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
// partition level column statistics test
// create a table with multiple partitions
cleanUp(dbName, tblName, typeName);
List<List<String>> values = new ArrayList<List<String>>();
values.add(makeVals("2008-07-01 14:13:12", "14"));
values.add(makeVals("2008-07-01 14:13:12", "15"));
values.add(makeVals("2008-07-02 14:13:12", "15"));
values.add(makeVals("2008-07-03 14:13:12", "151"));
createMultiPartitionTableSchema(dbName, tblName, typeName, values);
List<String> partitions = client.listPartitionNames(dbName, tblName, (short) -1);
partName = partitions.get(0);
isTblLevel = false;
// create a new columnstatistics desc to represent partition level column stats
statsDesc = new ColumnStatisticsDesc();
statsDesc.setDbName(dbName);
statsDesc.setTableName(tblName);
statsDesc.setPartName(partName);
statsDesc.setIsTblLevel(isTblLevel);
colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
client.updatePartitionColumnStatistics(colStats);
colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).get(partName).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
assertEquals(colStats.getStatsDesc().getPartName(), partName);
assertEquals(colStats2.getColName(), colName[1]);
assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen);
assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen);
assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls);
assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
// test stats deletion at partition level
client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1]);
colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[0])).get(partName).get(0);
// test get stats on a column for which stats doesn't exist
assertTrue(client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).isEmpty());
} catch (Exception e) {
System.err.println(StringUtils.stringifyException(e));
System.err.println("testColumnStatistics() failed.");
throw e;
} finally {
cleanUp(dbName, tblName, typeName);
}
}
Aggregations