Search in sources :

Example 66 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class TestHBaseStore method booleanPartitionStatistics.

@Test
public void booleanPartitionStatistics() throws Exception {
    createMockTableAndPartition(BOOLEAN_TYPE, BOOLEAN_VAL);
    // Add partition stats for: BOOLEAN_COL and partition: {PART_KEY, BOOLEAN_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, BOOLEAN_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = booleanColStatsObjs.get(0);
    BooleanColumnStatsData boolData = obj.getStatsData().getBooleanStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(BOOLEAN_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, dataFromDB.getSetField());
    // Compare BooleanColumnStatsData
    BooleanColumnStatsData boolDataFromDB = dataFromDB.getBooleanStats();
    Assert.assertEquals(boolData.getNumTrues(), boolDataFromDB.getNumTrues());
    Assert.assertEquals(boolData.getNumFalses(), boolDataFromDB.getNumFalses());
    Assert.assertEquals(boolData.getNumNulls(), boolDataFromDB.getNumNulls());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 67 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class TestHBaseStore method decimalTableStatistics.

@Test
public void decimalTableStatistics() throws Exception {
    createMockTable(DECIMAL_TYPE);
    // Add a decimal table stats for DECIMAL_COL to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
    // Compare DecimalColumnStatsData
    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 68 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class TestHBaseStore method stringPartitionStatistics.

@Test
public void stringPartitionStatistics() throws Exception {
    createMockTableAndPartition(STRING_TYPE, STRING_VAL);
    // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(STRING_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
    // Compare StringColumnStatsData
    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 69 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class MetaDataFormatUtils method formatWithIndentation.

private static void formatWithIndentation(String colName, String colType, String colComment, StringBuilder tableInfo, List<ColumnStatisticsObj> colStats) {
    tableInfo.append(String.format("%-" + ALIGNMENT + "s", colName)).append(FIELD_DELIM);
    tableInfo.append(String.format("%-" + ALIGNMENT + "s", colType)).append(FIELD_DELIM);
    if (colStats != null) {
        ColumnStatisticsObj cso = getColumnStatisticsObject(colName, colType, colStats);
        if (cso != null) {
            ColumnStatisticsData csd = cso.getStatsData();
            if (csd.isSetBinaryStats()) {
                BinaryColumnStatsData bcsd = csd.getBinaryStats();
                appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", "");
            } else if (csd.isSetStringStats()) {
                StringColumnStatsData scsd = csd.getStringStats();
                appendColumnStats(tableInfo, "", "", scsd.getNumNulls(), scsd.getNumDVs(), scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
            } else if (csd.isSetBooleanStats()) {
                BooleanColumnStatsData bcsd = csd.getBooleanStats();
                appendColumnStats(tableInfo, "", "", bcsd.getNumNulls(), "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses());
            } else if (csd.isSetDecimalStats()) {
                DecimalColumnStatsData dcsd = csd.getDecimalStats();
                appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetDoubleStats()) {
                DoubleColumnStatsData dcsd = csd.getDoubleStats();
                appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetLongStats()) {
                LongColumnStatsData lcsd = csd.getLongStats();
                appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), lcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetDateStats()) {
                DateColumnStatsData dcsd = csd.getDateStats();
                appendColumnStats(tableInfo, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            }
        } else {
            appendColumnStats(tableInfo, "", "", "", "", "", "", "", "");
        }
    }
    int colNameLength = ALIGNMENT > colName.length() ? ALIGNMENT : colName.length();
    int colTypeLength = ALIGNMENT > colType.length() ? ALIGNMENT : colType.length();
    indentMultilineValue(colComment, tableInfo, new int[] { colNameLength, colTypeLength }, false);
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 70 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class MetaDataFormatUtils method formatWithoutIndentation.

private static void formatWithoutIndentation(String name, String type, String comment, StringBuilder colBuffer, List<ColumnStatisticsObj> colStats) {
    colBuffer.append(name);
    colBuffer.append(FIELD_DELIM);
    colBuffer.append(type);
    colBuffer.append(FIELD_DELIM);
    if (colStats != null) {
        ColumnStatisticsObj cso = getColumnStatisticsObject(name, type, colStats);
        if (cso != null) {
            ColumnStatisticsData csd = cso.getStatsData();
            if (csd.isSetBinaryStats()) {
                BinaryColumnStatsData bcsd = csd.getBinaryStats();
                appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", "");
            } else if (csd.isSetStringStats()) {
                StringColumnStatsData scsd = csd.getStringStats();
                appendColumnStatsNoFormatting(colBuffer, "", "", scsd.getNumNulls(), scsd.getNumDVs(), scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
            } else if (csd.isSetBooleanStats()) {
                BooleanColumnStatsData bcsd = csd.getBooleanStats();
                appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses());
            } else if (csd.isSetDecimalStats()) {
                DecimalColumnStatsData dcsd = csd.getDecimalStats();
                appendColumnStatsNoFormatting(colBuffer, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetDoubleStats()) {
                DoubleColumnStatsData dcsd = csd.getDoubleStats();
                appendColumnStatsNoFormatting(colBuffer, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetLongStats()) {
                LongColumnStatsData lcsd = csd.getLongStats();
                appendColumnStatsNoFormatting(colBuffer, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), lcsd.getNumDVs(), "", "", "", "");
            } else if (csd.isSetDateStats()) {
                DateColumnStatsData dcsd = csd.getDateStats();
                appendColumnStatsNoFormatting(colBuffer, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
            }
        } else {
            appendColumnStatsNoFormatting(colBuffer, "", "", "", "", "", "", "", "");
        }
    }
    colBuffer.append(comment == null ? "" : HiveStringUtils.escapeJava(comment));
    colBuffer.append(LINE_DELIM);
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)108 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)62 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)56 Test (org.junit.Test)53 ArrayList (java.util.ArrayList)47 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)34 Table (org.apache.hadoop.hive.metastore.api.Table)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22 HashMap (java.util.HashMap)20 List (java.util.List)18