Search in sources :

Example 81 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HiveAlterHandler method updateOrGetPartitionColumnStats.

private ColumnStatistics updateOrGetPartitionColumnStats(RawStore msdb, String dbname, String tblname, List<String> partVals, List<FieldSchema> oldCols, Table table, Partition part, List<FieldSchema> newCols) throws MetaException, InvalidObjectException {
    ColumnStatistics newPartsColStats = null;
    try {
        // if newCols are not specified, use default ones.
        if (newCols == null) {
            newCols = part.getSd() == null ? new ArrayList<>() : part.getSd().getCols();
        }
        String oldPartName = Warehouse.makePartName(table.getPartitionKeys(), partVals);
        String newPartName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues());
        boolean rename = !part.getDbName().equals(dbname) || !part.getTableName().equals(tblname) || !oldPartName.equals(newPartName);
        // do not need to update column stats if alter partition is not for rename or changing existing columns
        if (!rename && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) {
            return newPartsColStats;
        }
        List<String> oldColNames = new ArrayList<>(oldCols.size());
        for (FieldSchema oldCol : oldCols) {
            oldColNames.add(oldCol.getName());
        }
        List<String> oldPartNames = Lists.newArrayList(oldPartName);
        List<ColumnStatistics> partsColStats = msdb.getPartitionColumnStatistics(dbname, tblname, oldPartNames, oldColNames);
        assert (partsColStats.size() <= 1);
        for (ColumnStatistics partColStats : partsColStats) {
            // actually only at most one loop
            List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
            List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
            List<String> deletedCols = new ArrayList<>();
            for (ColumnStatisticsObj statsObj : statsObjs) {
                boolean found = false;
                for (FieldSchema newCol : newCols) {
                    if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                        found = true;
                        break;
                    }
                }
                if (found) {
                    if (rename) {
                        msdb.deletePartitionColumnStatistics(dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName());
                        newStatsObjs.add(statsObj);
                    }
                } else {
                    msdb.deletePartitionColumnStatistics(dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName());
                    deletedCols.add(statsObj.getColName());
                }
            }
            StatsSetupConst.removeColumnStatsState(part.getParameters(), deletedCols);
            if (!newStatsObjs.isEmpty()) {
                partColStats.setStatsObj(newStatsObjs);
                newPartsColStats = partColStats;
            }
        }
    } catch (NoSuchObjectException nsoe) {
    // ignore this exception, actually this exception won't be thrown from getPartitionColumnStatistics
    } catch (InvalidInputException iie) {
        throw new InvalidObjectException("Invalid input to delete partition column stats." + iie);
    }
    return newPartsColStats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException)

Example 82 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HiveAlterHandler method alterTable.

@Override
public void alterTable(RawStore msdb, Warehouse wh, String dbname, String name, Table newt, EnvironmentContext environmentContext, IHMSHandler handler) throws InvalidOperationException, MetaException {
    name = name.toLowerCase();
    dbname = dbname.toLowerCase();
    final boolean cascade = environmentContext != null && environmentContext.isSetProperties() && StatsSetupConst.TRUE.equals(environmentContext.getProperties().get(StatsSetupConst.CASCADE));
    if (newt == null) {
        throw new InvalidOperationException("New table is invalid: " + newt);
    }
    String newTblName = newt.getTableName().toLowerCase();
    String newDbName = newt.getDbName().toLowerCase();
    if (!MetaStoreUtils.validateName(newTblName, handler.getConf())) {
        throw new InvalidOperationException(newTblName + " is not a valid object name");
    }
    String validate = MetaStoreUtils.validateTblColumns(newt.getSd().getCols());
    if (validate != null) {
        throw new InvalidOperationException("Invalid column " + validate);
    }
    Path srcPath = null;
    FileSystem srcFs;
    Path destPath = null;
    FileSystem destFs = null;
    boolean success = false;
    boolean dataWasMoved = false;
    Table oldt;
    List<TransactionalMetaStoreEventListener> transactionalListeners = null;
    if (handler != null) {
        transactionalListeners = handler.getTransactionalListeners();
    }
    try {
        boolean rename = false;
        boolean isPartitionedTable = false;
        List<Partition> parts;
        // check if table with the new name already exists
        if (!newTblName.equals(name) || !newDbName.equals(dbname)) {
            if (msdb.getTable(newDbName, newTblName) != null) {
                throw new InvalidOperationException("new table " + newDbName + "." + newTblName + " already exists");
            }
            rename = true;
        }
        msdb.openTransaction();
        // get old table
        oldt = msdb.getTable(dbname, name);
        if (oldt == null) {
            throw new InvalidOperationException("table " + dbname + "." + name + " doesn't exist");
        }
        if (oldt.getPartitionKeysSize() != 0) {
            isPartitionedTable = true;
        }
        // be done only for non-views.
        if (MetastoreConf.getBoolVar(handler.getConf(), MetastoreConf.ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES) && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString())) {
            // Throws InvalidOperationException if the new column types are not
            // compatible with the current column types.
            checkColTypeChangeCompatible(oldt.getSd().getCols(), newt.getSd().getCols());
        }
        // check that partition keys have not changed, except for virtual views
        // however, allow the partition comments to change
        boolean partKeysPartiallyEqual = checkPartialPartKeysEqual(oldt.getPartitionKeys(), newt.getPartitionKeys());
        if (!oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString())) {
            if (!partKeysPartiallyEqual) {
                throw new InvalidOperationException("partition keys can not be changed.");
            }
        }
        // 4) the table was not initially created with a specified location
        if (rename && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString()) && (oldt.getSd().getLocation().compareTo(newt.getSd().getLocation()) == 0 || StringUtils.isEmpty(newt.getSd().getLocation())) && !MetaStoreUtils.isExternalTable(oldt)) {
            Database olddb = msdb.getDatabase(dbname);
            // if a table was created in a user specified location using the DDL like
            // create table tbl ... location ...., it should be treated like an external table
            // in the table rename, its data location should not be changed. We can check
            // if the table directory was created directly under its database directory to tell
            // if it is such a table
            srcPath = new Path(oldt.getSd().getLocation());
            String oldtRelativePath = (new Path(olddb.getLocationUri()).toUri()).relativize(srcPath.toUri()).toString();
            boolean tableInSpecifiedLoc = !oldtRelativePath.equalsIgnoreCase(name) && !oldtRelativePath.equalsIgnoreCase(name + Path.SEPARATOR);
            if (!tableInSpecifiedLoc) {
                srcFs = wh.getFs(srcPath);
                // get new location
                Database db = msdb.getDatabase(newDbName);
                Path databasePath = constructRenamedPath(wh.getDatabasePath(db), srcPath);
                destPath = new Path(databasePath, newTblName);
                destFs = wh.getFs(destPath);
                newt.getSd().setLocation(destPath.toString());
                // check that src and dest are on the same file system
                if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
                    throw new InvalidOperationException("table new location " + destPath + " is on a different file system than the old location " + srcPath + ". This operation is not supported");
                }
                try {
                    if (destFs.exists(destPath)) {
                        throw new InvalidOperationException("New location for this table " + newDbName + "." + newTblName + " already exists : " + destPath);
                    }
                    // check that src exists and also checks permissions necessary, rename src to dest
                    if (srcFs.exists(srcPath) && wh.renameDir(srcPath, destPath, true)) {
                        dataWasMoved = true;
                    }
                } catch (IOException | MetaException e) {
                    LOG.error("Alter Table operation for " + dbname + "." + name + " failed.", e);
                    throw new InvalidOperationException("Alter Table operation for " + dbname + "." + name + " failed to move data due to: '" + getSimpleMessage(e) + "' See hive log file for details.");
                }
            }
            if (isPartitionedTable) {
                String oldTblLocPath = srcPath.toUri().getPath();
                String newTblLocPath = dataWasMoved ? destPath.toUri().getPath() : null;
                // also the location field in partition
                parts = msdb.getPartitions(dbname, name, -1);
                Map<Partition, ColumnStatistics> columnStatsNeedUpdated = new HashMap<>();
                for (Partition part : parts) {
                    String oldPartLoc = part.getSd().getLocation();
                    if (dataWasMoved && oldPartLoc.contains(oldTblLocPath)) {
                        URI oldUri = new Path(oldPartLoc).toUri();
                        String newPath = oldUri.getPath().replace(oldTblLocPath, newTblLocPath);
                        Path newPartLocPath = new Path(oldUri.getScheme(), oldUri.getAuthority(), newPath);
                        part.getSd().setLocation(newPartLocPath.toString());
                    }
                    part.setDbName(newDbName);
                    part.setTableName(newTblName);
                    ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, part.getValues(), part.getSd().getCols(), oldt, part, null);
                    if (colStats != null) {
                        columnStatsNeedUpdated.put(part, colStats);
                    }
                }
                msdb.alterTable(dbname, name, newt);
                // alterPartition is only for changing the partition location in the table rename
                if (dataWasMoved) {
                    int partsToProcess = parts.size();
                    int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
                    int batchStart = 0;
                    while (partsToProcess > 0) {
                        int batchEnd = Math.min(batchStart + partitionBatchSize, parts.size());
                        List<Partition> partBatch = parts.subList(batchStart, batchEnd);
                        int partBatchSize = partBatch.size();
                        partsToProcess -= partBatchSize;
                        batchStart += partBatchSize;
                        List<List<String>> partValues = new ArrayList<>(partBatchSize);
                        for (Partition part : partBatch) {
                            partValues.add(part.getValues());
                        }
                        msdb.alterPartitions(newDbName, newTblName, partValues, partBatch);
                    }
                }
                for (Entry<Partition, ColumnStatistics> partColStats : columnStatsNeedUpdated.entrySet()) {
                    ColumnStatistics newPartColStats = partColStats.getValue();
                    newPartColStats.getStatsDesc().setDbName(newDbName);
                    newPartColStats.getStatsDesc().setTableName(newTblName);
                    msdb.updatePartitionColumnStatistics(newPartColStats, partColStats.getKey().getValues());
                }
            } else {
                alterTableUpdateTableColumnStats(msdb, oldt, newt);
            }
        } else {
            // operations other than table rename
            if (MetaStoreUtils.requireCalStats(null, null, newt, environmentContext) && !isPartitionedTable) {
                Database db = msdb.getDatabase(newDbName);
                // Update table stats. For partitioned table, we update stats in alterPartition()
                MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext, false);
            }
            if (isPartitionedTable) {
                // Currently only column related changes can be cascaded in alter table
                if (!MetaStoreUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) {
                    parts = msdb.getPartitions(dbname, name, -1);
                    for (Partition part : parts) {
                        Partition oldPart = new Partition(part);
                        List<FieldSchema> oldCols = part.getSd().getCols();
                        part.getSd().setCols(newt.getSd().getCols());
                        ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, part.getValues(), oldCols, oldt, part, null);
                        assert (colStats == null);
                        if (cascade) {
                            msdb.alterPartition(dbname, name, part.getValues(), part);
                        } else {
                            // update changed properties (stats)
                            oldPart.setParameters(part.getParameters());
                            msdb.alterPartition(dbname, name, part.getValues(), oldPart);
                        }
                    }
                    msdb.alterTable(dbname, name, newt);
                } else {
                    LOG.warn("Alter table not cascaded to partitions.");
                    alterTableUpdateTableColumnStats(msdb, oldt, newt);
                }
            } else {
                alterTableUpdateTableColumnStats(msdb, oldt, newt);
            }
        }
        if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
            if (oldt.getDbName().equalsIgnoreCase(newt.getDbName())) {
                MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_TABLE, new AlterTableEvent(oldt, newt, false, true, handler), environmentContext);
            } else {
                MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.DROP_TABLE, new DropTableEvent(oldt, true, false, handler), environmentContext);
                MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.CREATE_TABLE, new CreateTableEvent(newt, true, handler), environmentContext);
                if (isPartitionedTable) {
                    parts = msdb.getPartitions(newt.getDbName(), newt.getTableName(), -1);
                    MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ADD_PARTITION, new AddPartitionEvent(newt, parts, true, handler), environmentContext);
                }
            }
        }
        // commit the changes
        success = msdb.commitTransaction();
    } catch (InvalidObjectException e) {
        LOG.debug("Failed to get object from Metastore ", e);
        throw new InvalidOperationException("Unable to change partition or table." + " Check metastore logs for detailed stack." + e.getMessage());
    } catch (InvalidInputException e) {
        LOG.debug("Accessing Metastore failed due to invalid input ", e);
        throw new InvalidOperationException("Unable to change partition or table." + " Check metastore logs for detailed stack." + e.getMessage());
    } catch (NoSuchObjectException e) {
        LOG.debug("Object not found in metastore ", e);
        throw new InvalidOperationException("Unable to change partition or table. Database " + dbname + " does not exist" + " Check metastore logs for detailed stack." + e.getMessage());
    } finally {
        if (!success) {
            LOG.error("Failed to alter table " + dbname + "." + name);
            msdb.rollbackTransaction();
            if (dataWasMoved) {
                try {
                    if (destFs.exists(destPath)) {
                        if (!destFs.rename(destPath, srcPath)) {
                            LOG.error("Failed to restore data from " + destPath + " to " + srcPath + " in alter table failure. Manual restore is needed.");
                        }
                    }
                } catch (IOException e) {
                    LOG.error("Failed to restore data from " + destPath + " to " + srcPath + " in alter table failure. Manual restore is needed.");
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) URI(java.net.URI) AlterTableEvent(org.apache.hadoop.hive.metastore.events.AlterTableEvent) CreateTableEvent(org.apache.hadoop.hive.metastore.events.CreateTableEvent) DropTableEvent(org.apache.hadoop.hive.metastore.events.DropTableEvent) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) Database(org.apache.hadoop.hive.metastore.api.Database) ArrayList(java.util.ArrayList) List(java.util.List) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Path(org.apache.hadoop.fs.Path) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) Table(org.apache.hadoop.hive.metastore.api.Table) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) AddPartitionEvent(org.apache.hadoop.hive.metastore.events.AddPartitionEvent)

Example 83 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class MetaStoreDirectSql method getPartitionStats.

public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName, final List<String> partNames, List<String> colNames, boolean enableBitVector) throws MetaException {
    if (colNames.isEmpty() || partNames.isEmpty()) {
        return Collections.emptyList();
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        @Override
        public List<Object[]> run(final List<String> inputColNames) throws MetaException {
            Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {

                @Override
                public List<Object[]> run(List<String> inputPartNames) throws MetaException {
                    String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
                    long start = doTrace ? System.nanoTime() : 0;
                    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
                    Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColNames), queryText);
                    timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
                    if (qResult == null) {
                        query.closeAll();
                        return Collections.emptyList();
                    }
                    addQueryAfterUse(query);
                    return ensureList(qResult);
                }
            };
            try {
                return runBatched(partNames, b2);
            } finally {
                addQueryAfterUse(b2);
            }
        }
    };
    List<Object[]> list = runBatched(colNames, b);
    List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(Math.min(list.size(), partNames.size()));
    String lastPartName = null;
    int from = 0;
    for (int i = 0; i <= list.size(); ++i) {
        boolean isLast = i == list.size();
        String partName = isLast ? null : (String) list.get(i)[0];
        if (!isLast && partName.equals(lastPartName)) {
            continue;
        } else if (from != i) {
            ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
            csd.setPartName(lastPartName);
            result.add(makeColumnStats(list.subList(from, i), csd, 1));
        }
        lastPartName = partName;
        from = i;
        Deadline.checkTimeout();
    }
    b.closeAllQueries();
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) ArrayList(java.util.ArrayList) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList)

Example 84 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class CachedStore method getTableColumnStatistics.

@Override
public ColumnStatistics getTableColumnStatistics(String dbName, String tblName, List<String> colNames) throws MetaException, NoSuchObjectException {
    dbName = StringUtils.normalizeIdentifier(dbName);
    tblName = StringUtils.normalizeIdentifier(tblName);
    if (!shouldCacheTable(dbName, tblName)) {
        return rawStore.getTableColumnStatistics(dbName, tblName, colNames);
    }
    Table table = sharedCache.getTableFromCache(dbName, tblName);
    if (table == null) {
        // The table is not yet loaded in cache
        return rawStore.getTableColumnStatistics(dbName, tblName, colNames);
    }
    ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tblName);
    List<ColumnStatisticsObj> colStatObjs = sharedCache.getTableColStatsFromCache(dbName, tblName, colNames);
    return new ColumnStatistics(csd, colStatObjs);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Table(org.apache.hadoop.hive.metastore.api.Table) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 85 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class CachedStore method prewarm.

@VisibleForTesting
static /**
 * This initializes the caches in SharedCache by getting the objects from Metastore DB via
 * ObjectStore and populating the respective caches
 *
 * @param rawStore
 * @throws Exception
 */
void prewarm(RawStore rawStore) {
    if (isCachePrewarmed.get()) {
        return;
    }
    long startTime = System.nanoTime();
    LOG.info("Prewarming CachedStore");
    while (!isCachePrewarmed.get()) {
        // Prevents throwing exceptions in our raw store calls since we're not using RawStoreProxy
        Deadline.registerIfNot(1000000);
        List<String> dbNames;
        try {
            dbNames = rawStore.getAllDatabases();
        } catch (MetaException e) {
            // Try again
            continue;
        }
        LOG.info("Number of databases to prewarm: {}", dbNames.size());
        List<Database> databases = new ArrayList<>(dbNames.size());
        for (String dbName : dbNames) {
            try {
                databases.add(rawStore.getDatabase(dbName));
            } catch (NoSuchObjectException e) {
                // Continue with next database
                continue;
            }
        }
        sharedCache.populateDatabasesInCache(databases);
        LOG.debug("Databases cache is now prewarmed. Now adding tables, partitions and statistics to the cache");
        int numberOfDatabasesCachedSoFar = 0;
        for (String dbName : dbNames) {
            dbName = StringUtils.normalizeIdentifier(dbName);
            List<String> tblNames;
            try {
                tblNames = rawStore.getAllTables(dbName);
            } catch (MetaException e) {
                // Continue with next database
                continue;
            }
            int numberOfTablesCachedSoFar = 0;
            for (String tblName : tblNames) {
                tblName = StringUtils.normalizeIdentifier(tblName);
                if (!shouldCacheTable(dbName, tblName)) {
                    continue;
                }
                Table table;
                try {
                    table = rawStore.getTable(dbName, tblName);
                } catch (MetaException e) {
                    // in that case, continue with the next table
                    continue;
                }
                List<String> colNames = MetaStoreUtils.getColumnNamesForTable(table);
                try {
                    ColumnStatistics tableColStats = null;
                    List<Partition> partitions = null;
                    List<ColumnStatistics> partitionColStats = null;
                    AggrStats aggrStatsAllPartitions = null;
                    AggrStats aggrStatsAllButDefaultPartition = null;
                    if (table.isSetPartitionKeys()) {
                        Deadline.startTimer("getPartitions");
                        partitions = rawStore.getPartitions(dbName, tblName, Integer.MAX_VALUE);
                        Deadline.stopTimer();
                        List<String> partNames = new ArrayList<>(partitions.size());
                        for (Partition p : partitions) {
                            partNames.add(Warehouse.makePartName(table.getPartitionKeys(), p.getValues()));
                        }
                        if (!partNames.isEmpty()) {
                            // Get partition column stats for this table
                            Deadline.startTimer("getPartitionColumnStatistics");
                            partitionColStats = rawStore.getPartitionColumnStatistics(dbName, tblName, partNames, colNames);
                            Deadline.stopTimer();
                            // Get aggregate stats for all partitions of a table and for all but default
                            // partition
                            Deadline.startTimer("getAggrPartitionColumnStatistics");
                            aggrStatsAllPartitions = rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
                            Deadline.stopTimer();
                            // Remove default partition from partition names and get aggregate
                            // stats again
                            List<FieldSchema> partKeys = table.getPartitionKeys();
                            String defaultPartitionValue = MetastoreConf.getVar(rawStore.getConf(), ConfVars.DEFAULTPARTITIONNAME);
                            List<String> partCols = new ArrayList<>();
                            List<String> partVals = new ArrayList<>();
                            for (FieldSchema fs : partKeys) {
                                partCols.add(fs.getName());
                                partVals.add(defaultPartitionValue);
                            }
                            String defaultPartitionName = FileUtils.makePartName(partCols, partVals);
                            partNames.remove(defaultPartitionName);
                            Deadline.startTimer("getAggrPartitionColumnStatistics");
                            aggrStatsAllButDefaultPartition = rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
                            Deadline.stopTimer();
                        }
                    } else {
                        Deadline.startTimer("getTableColumnStatistics");
                        tableColStats = rawStore.getTableColumnStatistics(dbName, tblName, colNames);
                        Deadline.stopTimer();
                    }
                    sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, aggrStatsAllPartitions, aggrStatsAllButDefaultPartition);
                } catch (MetaException | NoSuchObjectException e) {
                    // Continue with next table
                    continue;
                }
                LOG.debug("Processed database: {}'s table: {}. Cached {} / {}  tables so far.", dbName, tblName, ++numberOfTablesCachedSoFar, tblNames.size());
            }
            LOG.debug("Processed database: {}. Cached {} / {} databases so far.", dbName, ++numberOfDatabasesCachedSoFar, dbNames.size());
        }
        isCachePrewarmed.set(true);
    }
    LOG.info("CachedStore initialized");
    long endTime = System.nanoTime();
    LOG.info("Time taken in prewarming = " + (endTime - startTime) / 1000000 + "ms");
    sharedCache.completeTableCachePrewarm();
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) Database(org.apache.hadoop.hive.metastore.api.Database) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)90 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)75 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)67 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)62 ArrayList (java.util.ArrayList)61 Test (org.junit.Test)53 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)40 Table (org.apache.hadoop.hive.metastore.api.Table)38 Partition (org.apache.hadoop.hive.metastore.api.Partition)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 List (java.util.List)26 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)13 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)12 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)12 HashMap (java.util.HashMap)11 Database (org.apache.hadoop.hive.metastore.api.Database)9