Search in sources :

Example 71 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class SessionHiveMetaStoreClient method setPartitionColumnStatistics.

/**
 * {@inheritDoc}
 */
@Override
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException {
    if (request.getColStatsSize() == 1) {
        ColumnStatistics colStats = request.getColStatsIterator().next();
        ColumnStatisticsDesc desc = colStats.getStatsDesc();
        String dbName = desc.getDbName().toLowerCase();
        String tableName = desc.getTableName().toLowerCase();
        if (getTempTable(dbName, tableName) != null) {
            return updateTempTableColumnStats(dbName, tableName, colStats);
        }
    }
    return super.setPartitionColumnStatistics(request);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 72 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ColStatsProcessor method constructColumnStatsFromPackedRows.

private List<ColumnStatistics> constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException {
    Table tbl = tbl1;
    String partName = null;
    List<String> colName = colStatDesc.getColName();
    List<String> colType = colStatDesc.getColType();
    boolean isTblLevel = colStatDesc.isTblLevel();
    List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
    InspectableObject packedRow;
    while ((packedRow = ftOp.getNextRow()) != null) {
        if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
            throw new HiveException("Unexpected object type encountered while unpacking row");
        }
        List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
        StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
        List<FieldSchema> partColSchema = tbl.getPartCols();
        // Partition columns are appended at end, we only care about stats column
        int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
        assert list != null;
        for (int i = 0; i < numOfStatCols; i++) {
            StructField structField = fields.get(i);
            String columnName = colName.get(i);
            String columnType = colType.get(i);
            Object values = list.get(i);
            try {
                ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values);
                statsObjs.add(statObj);
            } catch (Exception e) {
                if (isStatsReliable) {
                    throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
                } else {
                    LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
                }
            }
        }
        if (!statsObjs.isEmpty()) {
            if (!isTblLevel) {
                List<String> partVals = new ArrayList<String>();
                // Iterate over partition columns to figure out partition name
                for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
                    Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
                    partVals.add(// could be null for default partition
                    partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
                }
                partName = Warehouse.makePartName(partColSchema, partVals);
            }
            ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
            ColumnStatistics colStats = new ColumnStatistics();
            colStats.setStatsDesc(statsDesc);
            colStats.setStatsObj(statsObjs);
            stats.add(colStats);
        }
    }
    ftOp.clearFetchContext();
    return stats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 73 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ColStatsProcessor method buildColumnStatsDesc.

private ColumnStatisticsDesc buildColumnStatsDesc(Table table, String partName, boolean isTblLevel) {
    String dbName = table.getDbName();
    assert dbName != null;
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setDbName(dbName);
    statsDesc.setTableName(table.getTableName());
    statsDesc.setIsTblLevel(isTblLevel);
    if (!isTblLevel) {
        statsDesc.setPartName(partName);
    } else {
        statsDesc.setPartName(null);
    }
    return statsDesc;
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 74 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class MetaStoreDirectSql method getPartitionStats.

public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName, final List<String> partNames, List<String> colNames, boolean enableBitVector) throws MetaException {
    if (colNames.isEmpty() || partNames.isEmpty()) {
        return Collections.emptyList();
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        @Override
        public List<Object[]> run(final List<String> inputColNames) throws MetaException {
            Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {

                @Override
                public List<Object[]> run(List<String> inputPartNames) throws MetaException {
                    String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
                    long start = doTrace ? System.nanoTime() : 0;
                    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
                    Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColNames), queryText);
                    timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
                    if (qResult == null) {
                        query.closeAll();
                        return Collections.emptyList();
                    }
                    addQueryAfterUse(query);
                    return ensureList(qResult);
                }
            };
            try {
                return runBatched(partNames, b2);
            } finally {
                addQueryAfterUse(b2);
            }
        }
    };
    List<Object[]> list = runBatched(colNames, b);
    List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(Math.min(list.size(), partNames.size()));
    String lastPartName = null;
    int from = 0;
    for (int i = 0; i <= list.size(); ++i) {
        boolean isLast = i == list.size();
        String partName = isLast ? null : (String) list.get(i)[0];
        if (!isLast && partName.equals(lastPartName)) {
            continue;
        } else if (from != i) {
            ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
            csd.setPartName(lastPartName);
            result.add(makeColumnStats(list.subList(from, i), csd, 1));
        }
        lastPartName = partName;
        from = i;
        Deadline.checkTimeout();
    }
    b.closeAllQueries();
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) ArrayList(java.util.ArrayList) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList)

Example 75 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class StatObjectConverter method getTableColumnStatisticsDesc.

public static ColumnStatisticsDesc getTableColumnStatisticsDesc(MTableColumnStatistics mStatsObj) {
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setIsTblLevel(true);
    statsDesc.setDbName(mStatsObj.getDbName());
    statsDesc.setTableName(mStatsObj.getTableName());
    statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
    return statsDesc;
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)81 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)69 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)63 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)56 ArrayList (java.util.ArrayList)54 Test (org.junit.Test)53 Table (org.apache.hadoop.hive.metastore.api.Table)37 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)36 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 Partition (org.apache.hadoop.hive.metastore.api.Partition)31 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)28 List (java.util.List)22 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)18 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 Database (org.apache.hadoop.hive.metastore.api.Database)7 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)5