Search in sources :

Example 41 with LongColumnStatsData

use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.

the class TestHBaseAggregateStatsExtrapolation method noPartitionsHaveBitVectorStatus.

@Test
public void noPartitionsHaveBitVectorStatus() throws Exception {
    String dbName = "default";
    String tableName = "snp";
    long now = System.currentTimeMillis();
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema("col2", "long", "nocomment"));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, Collections.<String, String>emptyMap());
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("ds", "string", ""));
    Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, Collections.<String, String>emptyMap(), null, null, null);
    store.createTable(table);
    List<List<String>> partVals = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        List<String> partVal = Arrays.asList("" + i);
        partVals.add(partVal);
        StorageDescriptor psd = new StorageDescriptor(sd);
        psd.setLocation("file:/tmp/default/hit/ds=" + partVal);
        Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, Collections.<String, String>emptyMap());
        store.addPartition(part);
        ColumnStatistics cs = new ColumnStatistics();
        ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
        desc.setLastAnalyzed(now);
        desc.setPartName("ds=" + partVal);
        cs.setStatsDesc(desc);
        ColumnStatisticsObj obj = new ColumnStatisticsObj();
        obj.setColName("col2");
        obj.setColType("long");
        ColumnStatisticsData data = new ColumnStatisticsData();
        LongColumnStatsData dcsd = new LongColumnStatsData();
        dcsd.setHighValue(1000 + i);
        dcsd.setLowValue(-1000 - i);
        dcsd.setNumNulls(i);
        dcsd.setNumDVs(10 * i);
        data.setLongStats(dcsd);
        obj.setStatsData(data);
        cs.addToStatsObj(obj);
        store.updatePartitionColumnStatistics(cs, partVal);
    }
    Checker statChecker = new Checker() {

        @Override
        public void checkStats(AggrStats aggrStats) throws Exception {
            Assert.assertEquals(10, aggrStats.getPartsFound());
            Assert.assertEquals(1, aggrStats.getColStatsSize());
            ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
            Assert.assertEquals("col2", cso.getColName());
            Assert.assertEquals("long", cso.getColType());
            LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
            Assert.assertEquals(1009, lcsd.getHighValue(), 0.01);
            Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01);
            Assert.assertEquals(45, lcsd.getNumNulls());
            Assert.assertEquals(90, lcsd.getNumDVs());
        }
    };
    List<String> partNames = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        partNames.add("ds=" + i);
    }
    AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, Arrays.asList("col2"));
    statChecker.checkStats(aggrStats);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) List(java.util.List) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 42 with LongColumnStatsData

use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.

the class MetaDataFormatUtils method extractColumnValues.

public static String[] extractColumnValues(FieldSchema col, boolean isColStatsAvailable, ColumnStatisticsObj columnStatisticsObj) {
    List<String> ret = new ArrayList<>();
    ret.add(col.getName());
    ret.add(col.getType());
    if (isColStatsAvailable) {
        if (columnStatisticsObj != null) {
            ColumnStatisticsData csd = columnStatisticsObj.getStatsData();
            // @formatter:off
            if (csd.isSetBinaryStats()) {
                BinaryColumnStatsData bcsd = csd.getBinaryStats();
                ret.addAll(Lists.newArrayList("", "", "" + bcsd.getNumNulls(), "", "" + bcsd.getAvgColLen(), "" + bcsd.getMaxColLen(), "", "", convertToString(bcsd.getBitVectors())));
            } else if (csd.isSetStringStats()) {
                StringColumnStatsData scsd = csd.getStringStats();
                ret.addAll(Lists.newArrayList("", "", "" + scsd.getNumNulls(), "" + scsd.getNumDVs(), "" + scsd.getAvgColLen(), "" + scsd.getMaxColLen(), "", "", convertToString(scsd.getBitVectors())));
            } else if (csd.isSetBooleanStats()) {
                BooleanColumnStatsData bcsd = csd.getBooleanStats();
                ret.addAll(Lists.newArrayList("", "", "" + bcsd.getNumNulls(), "", "", "", "" + bcsd.getNumTrues(), "" + bcsd.getNumFalses(), convertToString(bcsd.getBitVectors())));
            } else if (csd.isSetDecimalStats()) {
                DecimalColumnStatsData dcsd = csd.getDecimalStats();
                ret.addAll(Lists.newArrayList(convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            } else if (csd.isSetDoubleStats()) {
                DoubleColumnStatsData dcsd = csd.getDoubleStats();
                ret.addAll(Lists.newArrayList("" + dcsd.getLowValue(), "" + dcsd.getHighValue(), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            } else if (csd.isSetLongStats()) {
                LongColumnStatsData lcsd = csd.getLongStats();
                ret.addAll(Lists.newArrayList("" + lcsd.getLowValue(), "" + lcsd.getHighValue(), "" + lcsd.getNumNulls(), "" + lcsd.getNumDVs(), "", "", "", "", convertToString(lcsd.getBitVectors())));
            } else if (csd.isSetDateStats()) {
                DateColumnStatsData dcsd = csd.getDateStats();
                ret.addAll(Lists.newArrayList(convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            }
        // @formatter:on
        } else {
            ret.addAll(Lists.newArrayList("", "", "", "", "", "", "", "", ""));
        }
    }
    ret.add(getComment(col));
    return ret.toArray(new String[] {});
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 43 with LongColumnStatsData

use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.

the class StatObjectConverter method convertToMTableColumnStatistics.

// JDO
public static MTableColumnStatistics convertToMTableColumnStatistics(MTable table, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj) throws NoSuchObjectException, MetaException, InvalidObjectException {
    if (statsObj == null || statsDesc == null) {
        throw new InvalidObjectException("Invalid column stats object");
    }
    MTableColumnStatistics mColStats = new MTableColumnStatistics();
    mColStats.setTable(table);
    mColStats.setDbName(statsDesc.getDbName());
    mColStats.setTableName(statsDesc.getTableName());
    mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
    mColStats.setColName(statsObj.getColName());
    mColStats.setColType(statsObj.getColType());
    if (statsObj.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
        mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
    } else if (statsObj.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
        mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
        mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
        String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null;
        String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null;
        mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
    } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
        mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
        mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
    }
    return mColStats;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 44 with LongColumnStatsData

use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.

the class TestTxnCommands method verifyLongStats.

private void verifyLongStats(int dvCount, int min, int max, List<ColumnStatisticsObj> stats) {
    Assert.assertEquals(1, stats.size());
    LongColumnStatsData data = stats.get(0).getStatsData().getLongStats();
    Assert.assertEquals(min, data.getLowValue());
    Assert.assertEquals(max, data.getHighValue());
    Assert.assertEquals(dvCount, data.getNumDVs());
}
Also used : LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData)

Example 45 with LongColumnStatsData

use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.

the class ShowUtils method extractColumnValues.

public static String[] extractColumnValues(FieldSchema column, boolean isColumnStatsAvailable, ColumnStatisticsObj columnStatisticsObj) {
    List<String> values = new ArrayList<>();
    values.add(column.getName());
    values.add(column.getType());
    if (isColumnStatsAvailable) {
        if (columnStatisticsObj != null) {
            ColumnStatisticsData statsData = columnStatisticsObj.getStatsData();
            if (statsData.isSetBinaryStats()) {
                BinaryColumnStatsData binaryStats = statsData.getBinaryStats();
                values.addAll(Lists.newArrayList("", "", "" + binaryStats.getNumNulls(), "", "" + binaryStats.getAvgColLen(), "" + binaryStats.getMaxColLen(), "", "", convertToString(binaryStats.getBitVectors())));
            } else if (statsData.isSetStringStats()) {
                StringColumnStatsData stringStats = statsData.getStringStats();
                values.addAll(Lists.newArrayList("", "", "" + stringStats.getNumNulls(), "" + stringStats.getNumDVs(), "" + stringStats.getAvgColLen(), "" + stringStats.getMaxColLen(), "", "", convertToString(stringStats.getBitVectors())));
            } else if (statsData.isSetBooleanStats()) {
                BooleanColumnStatsData booleanStats = statsData.getBooleanStats();
                values.addAll(Lists.newArrayList("", "", "" + booleanStats.getNumNulls(), "", "", "", "" + booleanStats.getNumTrues(), "" + booleanStats.getNumFalses(), convertToString(booleanStats.getBitVectors())));
            } else if (statsData.isSetDecimalStats()) {
                DecimalColumnStatsData decimalStats = statsData.getDecimalStats();
                values.addAll(Lists.newArrayList(convertToString(decimalStats.getLowValue()), convertToString(decimalStats.getHighValue()), "" + decimalStats.getNumNulls(), "" + decimalStats.getNumDVs(), "", "", "", "", convertToString(decimalStats.getBitVectors())));
            } else if (statsData.isSetDoubleStats()) {
                DoubleColumnStatsData doubleStats = statsData.getDoubleStats();
                values.addAll(Lists.newArrayList("" + doubleStats.getLowValue(), "" + doubleStats.getHighValue(), "" + doubleStats.getNumNulls(), "" + doubleStats.getNumDVs(), "", "", "", "", convertToString(doubleStats.getBitVectors())));
            } else if (statsData.isSetLongStats()) {
                LongColumnStatsData longStats = statsData.getLongStats();
                values.addAll(Lists.newArrayList("" + longStats.getLowValue(), "" + longStats.getHighValue(), "" + longStats.getNumNulls(), "" + longStats.getNumDVs(), "", "", "", "", convertToString(longStats.getBitVectors())));
            } else if (statsData.isSetDateStats()) {
                DateColumnStatsData dateStats = statsData.getDateStats();
                values.addAll(Lists.newArrayList(convertToString(dateStats.getLowValue()), convertToString(dateStats.getHighValue()), "" + dateStats.getNumNulls(), "" + dateStats.getNumDVs(), "", "", "", "", convertToString(dateStats.getBitVectors())));
            } else if (statsData.isSetTimestampStats()) {
                TimestampColumnStatsData timestampStats = statsData.getTimestampStats();
                values.addAll(Lists.newArrayList(convertToString(timestampStats.getLowValue()), convertToString(timestampStats.getHighValue()), "" + timestampStats.getNumNulls(), "" + timestampStats.getNumDVs(), "", "", "", "", convertToString(timestampStats.getBitVectors())));
            }
        } else {
            values.addAll(Lists.newArrayList("", "", "", "", "", "", "", "", ""));
        }
    }
    values.add(column.getComment() != null ? column.getComment() : "");
    return values.toArray(new String[0]);
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)54 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)39 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)35 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)22 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)22 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)22 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)20 ArrayList (java.util.ArrayList)19 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)19 Test (org.junit.Test)19 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)15 Table (org.apache.hadoop.hive.metastore.api.Table)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)14 Partition (org.apache.hadoop.hive.metastore.api.Partition)14 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)14 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)13 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)12 List (java.util.List)11