Search in sources :

Example 41 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HiveAlterHandler method alterPartition.

@Override
public Partition alterPartition(final RawStore msdb, Warehouse wh, final String dbname, final String name, final List<String> part_vals, final Partition new_part, EnvironmentContext environmentContext, IHMSHandler handler) throws InvalidOperationException, InvalidObjectException, AlreadyExistsException, MetaException {
    boolean success = false;
    Partition oldPart;
    List<TransactionalMetaStoreEventListener> transactionalListeners = null;
    if (handler != null) {
        transactionalListeners = handler.getTransactionalListeners();
    }
    // Set DDL time to now if not specified
    if (new_part.getParameters() == null || new_part.getParameters().get(hive_metastoreConstants.DDL_TIME) == null || Integer.parseInt(new_part.getParameters().get(hive_metastoreConstants.DDL_TIME)) == 0) {
        new_part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(System.currentTimeMillis() / 1000));
    }
    // alter partition
    if (part_vals == null || part_vals.size() == 0) {
        try {
            msdb.openTransaction();
            Table tbl = msdb.getTable(dbname, name);
            if (tbl == null) {
                throw new InvalidObjectException("Unable to alter partition because table or database does not exist.");
            }
            oldPart = msdb.getPartition(dbname, name, new_part.getValues());
            if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) {
                // if stats are same, no need to update
                if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) {
                    MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters());
                } else {
                    MetaStoreUtils.updatePartitionStatsFast(new_part, tbl, wh, false, true, environmentContext, false);
                }
            }
            // PartitionView does not have SD. We do not need update its column stats
            if (oldPart.getSd() != null) {
                updateOrGetPartitionColumnStats(msdb, dbname, name, new_part.getValues(), oldPart.getSd().getCols(), tbl, new_part, null);
            }
            msdb.alterPartition(dbname, name, new_part.getValues(), new_part);
            if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
                MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_PARTITION, new AlterPartitionEvent(oldPart, new_part, tbl, false, true, handler), environmentContext);
            }
            success = msdb.commitTransaction();
        } catch (InvalidObjectException e) {
            throw new InvalidOperationException("alter is not possible");
        } catch (NoSuchObjectException e) {
            // old partition does not exist
            throw new InvalidOperationException("alter is not possible");
        } finally {
            if (!success) {
                msdb.rollbackTransaction();
            }
        }
        return oldPart;
    }
    // rename partition
    String oldPartLoc;
    String newPartLoc;
    Path srcPath = null;
    Path destPath = null;
    FileSystem srcFs;
    FileSystem destFs = null;
    boolean dataWasMoved = false;
    try {
        msdb.openTransaction();
        Table tbl = msdb.getTable(dbname, name);
        if (tbl == null) {
            throw new InvalidObjectException("Unable to alter partition because table or database does not exist.");
        }
        try {
            oldPart = msdb.getPartition(dbname, name, part_vals);
        } catch (NoSuchObjectException e) {
            // this means there is no existing partition
            throw new InvalidObjectException("Unable to rename partition because old partition does not exist");
        }
        Partition check_part;
        try {
            check_part = msdb.getPartition(dbname, name, new_part.getValues());
        } catch (NoSuchObjectException e) {
            // this means there is no existing partition
            check_part = null;
        }
        if (check_part != null) {
            throw new AlreadyExistsException("Partition already exists:" + dbname + "." + name + "." + new_part.getValues());
        }
        // 3) rename the partition directory if it is not an external table
        if (!tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) {
            try {
                // if tbl location is available use it
                // else derive the tbl location from database location
                destPath = wh.getPartitionPath(msdb.getDatabase(dbname), tbl, new_part.getValues());
                destPath = constructRenamedPath(destPath, new Path(new_part.getSd().getLocation()));
            } catch (NoSuchObjectException e) {
                LOG.debug("Didn't find object in metastore ", e);
                throw new InvalidOperationException("Unable to change partition or table. Database " + dbname + " does not exist" + " Check metastore logs for detailed stack." + e.getMessage());
            }
            if (destPath != null) {
                newPartLoc = destPath.toString();
                oldPartLoc = oldPart.getSd().getLocation();
                LOG.info("srcPath:" + oldPartLoc);
                LOG.info("descPath:" + newPartLoc);
                srcPath = new Path(oldPartLoc);
                srcFs = wh.getFs(srcPath);
                destFs = wh.getFs(destPath);
                // check that src and dest are on the same file system
                if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
                    throw new InvalidOperationException("New table location " + destPath + " is on a different file system than the old location " + srcPath + ". This operation is not supported.");
                }
                try {
                    if (srcFs.exists(srcPath)) {
                        if (newPartLoc.compareTo(oldPartLoc) != 0 && destFs.exists(destPath)) {
                            throw new InvalidOperationException("New location for this table " + tbl.getDbName() + "." + tbl.getTableName() + " already exists : " + destPath);
                        }
                        // if destPath's parent path doesn't exist, we should mkdir it
                        Path destParentPath = destPath.getParent();
                        if (!wh.mkdirs(destParentPath)) {
                            throw new MetaException("Unable to create path " + destParentPath);
                        }
                        // rename the data directory
                        wh.renameDir(srcPath, destPath, true);
                        LOG.info("Partition directory rename from " + srcPath + " to " + destPath + " done.");
                        dataWasMoved = true;
                    }
                } catch (IOException e) {
                    LOG.error("Cannot rename partition directory from " + srcPath + " to " + destPath, e);
                    throw new InvalidOperationException("Unable to access src or dest location for partition " + tbl.getDbName() + "." + tbl.getTableName() + " " + new_part.getValues());
                } catch (MetaException me) {
                    LOG.error("Cannot rename partition directory from " + srcPath + " to " + destPath, me);
                    throw me;
                }
                new_part.getSd().setLocation(newPartLoc);
            }
        } else {
            new_part.getSd().setLocation(oldPart.getSd().getLocation());
        }
        if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) {
            MetaStoreUtils.updatePartitionStatsFast(new_part, tbl, wh, false, true, environmentContext, false);
        }
        String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(), new_part.getValues());
        ColumnStatistics cs = updateOrGetPartitionColumnStats(msdb, dbname, name, oldPart.getValues(), oldPart.getSd().getCols(), tbl, new_part, null);
        msdb.alterPartition(dbname, name, part_vals, new_part);
        if (cs != null) {
            cs.getStatsDesc().setPartName(newPartName);
            try {
                msdb.updatePartitionColumnStatistics(cs, new_part.getValues());
            } catch (InvalidInputException iie) {
                throw new InvalidOperationException("Unable to update partition stats in table rename." + iie);
            } catch (NoSuchObjectException nsoe) {
            // It is ok, ignore
            }
        }
        if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
            MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_PARTITION, new AlterPartitionEvent(oldPart, new_part, tbl, false, true, handler), environmentContext);
        }
        success = msdb.commitTransaction();
    } finally {
        if (!success) {
            LOG.error("Failed to rename a partition. Rollback transaction");
            msdb.rollbackTransaction();
            if (dataWasMoved) {
                LOG.error("Revert the data move in renaming a partition.");
                try {
                    if (destFs.exists(destPath)) {
                        wh.renameDir(destPath, srcPath, false);
                    }
                } catch (MetaException me) {
                    LOG.error("Failed to restore partition data from " + destPath + " to " + srcPath + " in alter partition failure. Manual restore is needed.");
                } catch (IOException ioe) {
                    LOG.error("Failed to restore partition data from " + destPath + " to " + srcPath + " in alter partition failure. Manual restore is needed.");
                }
            }
        }
    }
    return oldPart;
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) Table(org.apache.hadoop.hive.metastore.api.Table) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) AlterPartitionEvent(org.apache.hadoop.hive.metastore.events.AlterPartitionEvent) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 42 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HiveAlterHandler method alterTableUpdateTableColumnStats.

@VisibleForTesting
void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable) throws MetaException, InvalidObjectException {
    String dbName = oldTable.getDbName().toLowerCase();
    String tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(oldTable.getTableName());
    String newDbName = newTable.getDbName().toLowerCase();
    String newTableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(newTable.getTableName());
    try {
        List<FieldSchema> oldCols = oldTable.getSd().getCols();
        List<FieldSchema> newCols = newTable.getSd().getCols();
        List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
        ColumnStatistics colStats = null;
        boolean updateColumnStats = true;
        // Nothing to update if everything is the same
        if (newDbName.equals(dbName) && newTableName.equals(tableName) && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) {
            updateColumnStats = false;
        }
        if (updateColumnStats) {
            List<String> oldColNames = new ArrayList<>(oldCols.size());
            for (FieldSchema oldCol : oldCols) {
                oldColNames.add(oldCol.getName());
            }
            // Collect column stats which need to be rewritten and remove old stats
            colStats = msdb.getTableColumnStatistics(dbName, tableName, oldColNames);
            if (colStats == null) {
                updateColumnStats = false;
            } else {
                List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
                if (statsObjs != null) {
                    List<String> deletedCols = new ArrayList<>();
                    for (ColumnStatisticsObj statsObj : statsObjs) {
                        boolean found = false;
                        for (FieldSchema newCol : newCols) {
                            if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                                found = true;
                                break;
                            }
                        }
                        if (found) {
                            if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) {
                                msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
                                newStatsObjs.add(statsObj);
                                deletedCols.add(statsObj.getColName());
                            }
                        } else {
                            msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
                            deletedCols.add(statsObj.getColName());
                        }
                    }
                    StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols);
                }
            }
        }
        // Change to new table and append stats for the new table
        msdb.alterTable(dbName, tableName, newTable);
        if (updateColumnStats && !newStatsObjs.isEmpty()) {
            ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
            statsDesc.setDbName(newDbName);
            statsDesc.setTableName(newTableName);
            colStats.setStatsObj(newStatsObjs);
            msdb.updateTableColumnStatistics(colStats);
        }
    } catch (NoSuchObjectException nsoe) {
        LOG.debug("Could not find db entry." + nsoe);
    } catch (InvalidInputException e) {
        // should not happen since the input were verified before passed in
        throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 43 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class ColStatsProcessor method persistColumnStats.

public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaException, IOException {
    // Construct a column statistics object from the result
    List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(tbl);
    // Note, this function is shared for both table and partition column stats.
    if (colStats.isEmpty()) {
        return 0;
    }
    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
    request.setNeedMerge(colStatDesc.isNeedMerge());
    db.setPartitionColumnStatistics(request);
    return 0;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest)

Example 44 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class ColumnStatsUpdateTask method constructColumnStatsFromInput.

private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
    String dbName = work.dbName();
    String tableName = work.getTableName();
    String partName = work.getPartName();
    String colName = work.getColName();
    String columnType = work.getColType();
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    // grammar prohibits more than 1 column so we are guaranteed to have only 1
    // element in this lists.
    statsObj.setColName(colName);
    statsObj.setColType(columnType);
    ColumnStatisticsData statsData = new ColumnStatisticsData();
    if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNullsIsSet(false);
        longStats.setNumDVsIsSet(false);
        longStats.setLowValueIsSet(false);
        longStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                longStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                longStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                longStats.setLowValue(Long.parseLong(value));
            } else if (fName.equals("highValue")) {
                longStats.setHighValue(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNullsIsSet(false);
        doubleStats.setNumDVsIsSet(false);
        doubleStats.setLowValueIsSet(false);
        doubleStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                doubleStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                doubleStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                doubleStats.setLowValue(Double.parseDouble(value));
            } else if (fName.equals("highValue")) {
                doubleStats.setHighValue(Double.parseDouble(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
        // char(x),varchar(x) types
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setMaxColLenIsSet(false);
        stringStats.setAvgColLenIsSet(false);
        stringStats.setNumNullsIsSet(false);
        stringStats.setNumDVsIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                stringStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                stringStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                stringStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                stringStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        booleanStats.setNumNullsIsSet(false);
        booleanStats.setNumTruesIsSet(false);
        booleanStats.setNumFalsesIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                booleanStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numTrues")) {
                booleanStats.setNumTrues(Long.parseLong(value));
            } else if (fName.equals("numFalses")) {
                booleanStats.setNumFalses(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNullsIsSet(false);
        binaryStats.setAvgColLenIsSet(false);
        binaryStats.setMaxColLenIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                binaryStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                binaryStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                binaryStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.toLowerCase().startsWith("decimal")) {
        // decimal(a,b) type
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNullsIsSet(false);
        decimalStats.setNumDVsIsSet(false);
        decimalStats.setLowValueIsSet(false);
        decimalStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                decimalStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                decimalStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else if (fName.equals("highValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDecimalStats(decimalStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                dateStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                dateStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                // Date high/low value is stored as long in stats DB, but allow users to set high/low
                // value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
                dateStats.setLowValue(readDateValue(value));
            } else if (fName.equals("highValue")) {
                dateStats.setHighValue(readDateValue(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDateStats(dateStats);
        statsObj.setStatsData(statsData);
    } else {
        throw new SemanticException("Unsupported type");
    }
    ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, partName == null);
    ColumnStatistics colStat = new ColumnStatistics();
    colStat.setStatsDesc(statsDesc);
    colStat.addToStatsObj(statsObj);
    return colStat;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) BigDecimal(java.math.BigDecimal) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Entry(java.util.Map.Entry) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) BigDecimal(java.math.BigDecimal) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) Map(java.util.Map) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 45 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class MetaStoreDirectSql method getTableStats.

/**
   * Retrieve the column statistics for the specified columns of the table. NULL
   * is returned if the columns are not provided.
   * @param dbName      the database name of the table
   * @param tableName   the table name
   * @param colNames    the list of the column names
   * @return            the column statistics for the specified columns
   * @throws MetaException
   */
public ColumnStatistics getTableStats(final String dbName, final String tableName, List<String> colNames) throws MetaException {
    if (colNames == null || colNames.isEmpty()) {
        return null;
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select " + STATS_COLLIST + " from \"TAB_COL_STATS\" " + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        public List<Object[]> run(List<String> input) throws MetaException {
            String queryText = queryText0 + makeParams(input.size()) + ")";
            Object[] params = new Object[input.size() + 2];
            params[0] = dbName;
            params[1] = tableName;
            for (int i = 0; i < input.size(); ++i) {
                params[i + 2] = input.get(i);
            }
            long start = doTrace ? System.nanoTime() : 0;
            Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
            Object qResult = executeWithArray(query, params, queryText);
            timingTrace(doTrace, queryText0 + "...)", start, (doTrace ? System.nanoTime() : 0));
            if (qResult == null) {
                query.closeAll();
                return null;
            }
            addQueryAfterUse(query);
            return ensureList(qResult);
        }
    };
    List<Object[]> list = runBatched(colNames, b);
    if (list.isEmpty())
        return null;
    ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tableName);
    ColumnStatistics result = makeColumnStats(list, csd, 0);
    b.closeAllQueries();
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)90 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)75 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)67 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)62 ArrayList (java.util.ArrayList)61 Test (org.junit.Test)53 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)40 Table (org.apache.hadoop.hive.metastore.api.Table)38 Partition (org.apache.hadoop.hive.metastore.api.Partition)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 List (java.util.List)26 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)13 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)12 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)12 HashMap (java.util.HashMap)11 Database (org.apache.hadoop.hive.metastore.api.Database)9