Search in sources :

Example 36 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class DbNotificationListener method onUpdatePartitionColumnStat.

@Override
public void onUpdatePartitionColumnStat(UpdatePartitionColumnStatEvent updatePartColStatEvent) throws MetaException {
    UpdatePartitionColumnStatMessage msg = MessageBuilder.getInstance().buildUpdatePartitionColumnStatMessage(updatePartColStatEvent.getPartColStats(), updatePartColStatEvent.getPartVals(), updatePartColStatEvent.getPartParameters(), updatePartColStatEvent.getTableObj(), updatePartColStatEvent.getWriteId());
    NotificationEvent event = new NotificationEvent(0, now(), EventType.UPDATE_PARTITION_COLUMN_STAT.toString(), msgEncoder.getSerializer().serialize(msg));
    ColumnStatisticsDesc statDesc = updatePartColStatEvent.getPartColStats().getStatsDesc();
    event.setCatName(statDesc.isSetCatName() ? statDesc.getCatName() : DEFAULT_CATALOG_NAME);
    event.setDbName(statDesc.getDbName());
    event.setTableName(statDesc.getTableName());
    process(event, updatePartColStatEvent);
}
Also used : UpdatePartitionColumnStatMessage(org.apache.hadoop.hive.metastore.messaging.UpdatePartitionColumnStatMessage) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent)

Example 37 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class DbNotificationListener method onUpdateTableColumnStat.

@Override
public void onUpdateTableColumnStat(UpdateTableColumnStatEvent updateTableColumnStatEvent) throws MetaException {
    UpdateTableColumnStatMessage msg = MessageBuilder.getInstance().buildUpdateTableColumnStatMessage(updateTableColumnStatEvent.getColStats(), updateTableColumnStatEvent.getTableObj(), updateTableColumnStatEvent.getTableParameters(), updateTableColumnStatEvent.getWriteId());
    NotificationEvent event = new NotificationEvent(0, now(), EventType.UPDATE_TABLE_COLUMN_STAT.toString(), msgEncoder.getSerializer().serialize(msg));
    ColumnStatisticsDesc statDesc = updateTableColumnStatEvent.getColStats().getStatsDesc();
    event.setCatName(statDesc.isSetCatName() ? statDesc.getCatName() : DEFAULT_CATALOG_NAME);
    event.setDbName(statDesc.getDbName());
    event.setTableName(statDesc.getTableName());
    process(event, updateTableColumnStatEvent);
}
Also used : UpdateTableColumnStatMessage(org.apache.hadoop.hive.metastore.messaging.UpdateTableColumnStatMessage) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NotificationEvent(org.apache.hadoop.hive.metastore.api.NotificationEvent)

Example 38 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ColStatsProcessor method constructColumnStatsFromPackedRows.

private boolean constructColumnStatsFromPackedRows(Table tbl, List<ColumnStatistics> stats, long maxNumStats) throws HiveException, MetaException, IOException {
    String partName = null;
    List<String> colName = colStatDesc.getColName();
    List<String> colType = colStatDesc.getColType();
    boolean isTblLevel = colStatDesc.isTblLevel();
    InspectableObject packedRow;
    long numStats = 0;
    while ((packedRow = ftOp.getNextRow()) != null) {
        if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
            throw new HiveException("Unexpected object type encountered while unpacking row");
        }
        final List<ColumnStatisticsObj> statsObjs = new ArrayList<>();
        final StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
        final List<? extends StructField> fields = soi.getAllStructFieldRefs();
        final List<Object> values = soi.getStructFieldsDataAsList(packedRow.o);
        // Partition columns are appended at end, we only care about stats column
        int pos = 0;
        for (int i = 0; i < colName.size(); i++) {
            String columnName = colName.get(i);
            String columnType = colType.get(i);
            PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(columnType);
            List<ColumnStatsField> columnStatsFields = ColumnStatsType.getColumnStats(typeInfo);
            try {
                ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics(columnName, columnType, columnStatsFields, pos, fields, values);
                statsObjs.add(statObj);
                numStats++;
            } catch (Exception e) {
                if (isStatsReliable) {
                    throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
                } else {
                    LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
                }
            }
            pos += columnStatsFields.size();
        }
        if (!statsObjs.isEmpty()) {
            if (!isTblLevel) {
                List<FieldSchema> partColSchema = tbl.getPartCols();
                List<String> partVals = new ArrayList<>();
                // Iterate over partition columns to figure out partition name
                for (int i = pos; i < pos + partColSchema.size(); i++) {
                    Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(values.get(i));
                    partVals.add(// could be null for default partition
                    partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
                }
                partName = Warehouse.makePartName(partColSchema, partVals);
            }
            ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
            ColumnStatistics colStats = new ColumnStatistics();
            colStats.setStatsDesc(statsDesc);
            colStats.setStatsObj(statsObjs);
            colStats.setEngine(Constants.HIVE_ENGINE);
            stats.add(colStats);
            if (numStats >= maxNumStats) {
                return false;
            }
        }
    }
    ftOp.clearFetchContext();
    return true;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 39 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ColumnStatsUpdateTask method getColumnStatsDesc.

private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, String partName, boolean isTblLevel) {
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setDbName(dbName);
    statsDesc.setTableName(tableName);
    statsDesc.setIsTblLevel(isTblLevel);
    if (!isTblLevel) {
        statsDesc.setPartName(partName);
    } else {
        statsDesc.setPartName(null);
    }
    return statsDesc;
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 40 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ColumnStatsUpdateTask method constructColumnStatsFromInput.

private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
    // If we are replicating the stats, we don't need to construct those again.
    if (work.getColStats() != null) {
        ColumnStatistics colStats = work.getColStats();
        LOG.debug("Got stats through replication for " + colStats.getStatsDesc().getDbName() + "." + colStats.getStatsDesc().getTableName());
        return colStats;
    }
    String dbName = work.dbName();
    String tableName = work.getTableName();
    String partName = work.getPartName();
    String colName = work.getColName();
    String columnType = work.getColType();
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    // grammar prohibits more than 1 column so we are guaranteed to have only 1
    // element in this lists.
    statsObj.setColName(colName);
    statsObj.setColType(columnType);
    ColumnStatisticsData statsData = new ColumnStatisticsData();
    if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNullsIsSet(false);
        longStats.setNumDVsIsSet(false);
        longStats.setLowValueIsSet(false);
        longStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                longStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                longStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                longStats.setLowValue(Long.parseLong(value));
            } else if (fName.equals("highValue")) {
                longStats.setHighValue(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNullsIsSet(false);
        doubleStats.setNumDVsIsSet(false);
        doubleStats.setLowValueIsSet(false);
        doubleStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                doubleStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                doubleStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                doubleStats.setLowValue(Double.parseDouble(value));
            } else if (fName.equals("highValue")) {
                doubleStats.setHighValue(Double.parseDouble(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
        // char(x),varchar(x) types
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setMaxColLenIsSet(false);
        stringStats.setAvgColLenIsSet(false);
        stringStats.setNumNullsIsSet(false);
        stringStats.setNumDVsIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                stringStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                stringStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                stringStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                stringStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        booleanStats.setNumNullsIsSet(false);
        booleanStats.setNumTruesIsSet(false);
        booleanStats.setNumFalsesIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                booleanStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numTrues")) {
                booleanStats.setNumTrues(Long.parseLong(value));
            } else if (fName.equals("numFalses")) {
                booleanStats.setNumFalses(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNullsIsSet(false);
        binaryStats.setAvgColLenIsSet(false);
        binaryStats.setMaxColLenIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                binaryStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                binaryStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                binaryStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.toLowerCase().startsWith("decimal")) {
        // decimal(a,b) type
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNullsIsSet(false);
        decimalStats.setNumDVsIsSet(false);
        decimalStats.setLowValueIsSet(false);
        decimalStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                decimalStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                decimalStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setLowValue(DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else if (fName.equals("highValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setHighValue(DecimalUtils.getDecimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDecimalStats(decimalStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                dateStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                dateStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                // Date high/low value is stored as long in stats DB, but allow users to set high/low
                // value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
                dateStats.setLowValue(readDateValue(value));
            } else if (fName.equals("highValue")) {
                dateStats.setHighValue(readDateValue(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDateStats(dateStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("timestamp")) {
        TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                timestampStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                timestampStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                timestampStats.setLowValue(readTimestampValue(value));
            } else if (fName.equals("highValue")) {
                timestampStats.setHighValue(readTimestampValue(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setTimestampStats(timestampStats);
        statsObj.setStatsData(statsData);
    } else {
        throw new SemanticException("Unsupported type");
    }
    ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, partName == null);
    ColumnStatistics colStat = new ColumnStatistics();
    colStat.setStatsDesc(statsDesc);
    colStat.addToStatsObj(statsObj);
    colStat.setEngine(Constants.HIVE_ENGINE);
    return colStat;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) BigDecimal(java.math.BigDecimal) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Entry(java.util.Map.Entry) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) Map(java.util.Map) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)77 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)68 ArrayList (java.util.ArrayList)60 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)54 Test (org.junit.Test)50 Table (org.apache.hadoop.hive.metastore.api.Table)38 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)30 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)25 List (java.util.List)24 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7