Search in sources :

Example 51 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class TestHBaseStoreCached method booleanTableStatistics.

// Due to the way our mock stuff works, we can only insert one column at a time, so we'll test
// each stat type separately.  We'll test them together in hte integration tests.
@Test
public void booleanTableStatistics() throws Exception {
    long now = System.currentTimeMillis();
    String dbname = "default";
    String tableName = "statstable";
    String boolcol = "boolcol";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema(boolcol, "boolean", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
    Table table = new Table(tableName, dbname, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    store.createTable(table);
    long trues = 37;
    long falses = 12;
    long booleanNulls = 2;
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
    desc.setLastAnalyzed(now);
    desc.setDbName(dbname);
    desc.setTableName(tableName);
    desc.setIsTblLevel(true);
    stats.setStatsDesc(desc);
    ColumnStatisticsObj obj = new ColumnStatisticsObj();
    obj.setColName(boolcol);
    obj.setColType("boolean");
    ColumnStatisticsData data = new ColumnStatisticsData();
    BooleanColumnStatsData boolData = new BooleanColumnStatsData();
    boolData.setNumTrues(trues);
    boolData.setNumFalses(falses);
    boolData.setNumNulls(booleanNulls);
    data.setBooleanStats(boolData);
    obj.setStatsData(data);
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol));
    Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(dbname, stats.getStatsDesc().getDbName());
    Assert.assertEquals(tableName, stats.getStatsDesc().getTableName());
    Assert.assertTrue(stats.getStatsDesc().isIsTblLevel());
    Assert.assertEquals(1, stats.getStatsObjSize());
    ColumnStatisticsData colData = obj.getStatsData();
    Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField());
    boolData = colData.getBooleanStats();
    Assert.assertEquals(trues, boolData.getNumTrues());
    Assert.assertEquals(falses, boolData.getNumFalses());
    Assert.assertEquals(booleanNulls, boolData.getNumNulls());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 52 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class TestHBaseStoreBitVector method longPartitionStatistics.

@Test
public void longPartitionStatistics() throws Exception {
    createMockTableAndPartition(INT_TYPE, INT_VAL);
    // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = longColStatsObjs.get(0);
    LongColumnStatsData longData = obj.getStatsData().getLongStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(INT_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
    // Compare LongColumnStatsData
    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 53 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class TestHBaseStore method booleanPartitionStatistics.

@Test
public void booleanPartitionStatistics() throws Exception {
    createMockTableAndPartition(BOOLEAN_TYPE, BOOLEAN_VAL);
    // Add partition stats for: BOOLEAN_COL and partition: {PART_KEY, BOOLEAN_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, BOOLEAN_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = booleanColStatsObjs.get(0);
    BooleanColumnStatsData boolData = obj.getStatsData().getBooleanStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(BOOLEAN_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, dataFromDB.getSetField());
    // Compare BooleanColumnStatsData
    BooleanColumnStatsData boolDataFromDB = dataFromDB.getBooleanStats();
    Assert.assertEquals(boolData.getNumTrues(), boolDataFromDB.getNumTrues());
    Assert.assertEquals(boolData.getNumFalses(), boolDataFromDB.getNumFalses());
    Assert.assertEquals(boolData.getNumNulls(), boolDataFromDB.getNumNulls());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 54 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class TestHBaseStore method decimalTableStatistics.

@Test
public void decimalTableStatistics() throws Exception {
    createMockTable(DECIMAL_TYPE);
    // Add a decimal table stats for DECIMAL_COL to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
    // Compare DecimalColumnStatsData
    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 55 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class TestHBaseStore method stringPartitionStatistics.

@Test
public void stringPartitionStatistics() throws Exception {
    createMockTableAndPartition(STRING_TYPE, STRING_VAL);
    // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(STRING_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
    // Compare StringColumnStatsData
    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)90 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)75 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)67 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)62 ArrayList (java.util.ArrayList)61 Test (org.junit.Test)53 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)40 Table (org.apache.hadoop.hive.metastore.api.Table)38 Partition (org.apache.hadoop.hive.metastore.api.Partition)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 List (java.util.List)26 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)13 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)12 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)12 HashMap (java.util.HashMap)11 Database (org.apache.hadoop.hive.metastore.api.Database)9