Search in sources :

Example 11 with InvalidInputException

use of org.apache.hadoop.hive.metastore.api.InvalidInputException in project hive by apache.

the class HiveAlterHandler method alterTableUpdateTableColumnStats.

@VisibleForTesting
public static List<ColumnStatistics> alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable, EnvironmentContext ec, String validWriteIds, Configuration conf, List<String> deletedCols) throws MetaException, InvalidObjectException {
    String catName = normalizeIdentifier(oldTable.isSetCatName() ? oldTable.getCatName() : getDefaultCatalog(conf));
    String dbName = oldTable.getDbName().toLowerCase();
    String tableName = normalizeIdentifier(oldTable.getTableName());
    String newDbName = newTable.getDbName().toLowerCase();
    String newTableName = normalizeIdentifier(newTable.getTableName());
    // if its not called from cahced store then update the table
    boolean doAlterTable = deletedCols == null;
    List<ColumnStatistics> newMultiColStats = new ArrayList<>();
    try {
        List<FieldSchema> oldCols = oldTable.getSd().getCols();
        List<FieldSchema> newCols = newTable.getSd().getCols();
        List<ColumnStatistics> multiColStats = null;
        boolean updateColumnStats = !newDbName.equals(dbName) || !newTableName.equals(tableName) || !MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols);
        // Don't bother in the case of ACID conversion.
        updateColumnStats = updateColumnStats && (TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable));
        if (updateColumnStats) {
            List<String> oldColNames = new ArrayList<>(oldCols.size());
            for (FieldSchema oldCol : oldCols) {
                oldColNames.add(oldCol.getName());
            }
            // NOTE: this doesn't check stats being compliant, but the alterTable call below does.
            // The worst we can do is delete the stats.
            // Collect column stats which need to be rewritten and remove old stats.
            multiColStats = msdb.getTableColumnStatistics(catName, dbName, tableName, oldColNames);
            if (multiColStats.isEmpty()) {
                updateColumnStats = false;
            } else {
                for (ColumnStatistics colStats : multiColStats) {
                    List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
                    if (statsObjs != null) {
                        // for out para, this value is initialized by caller.
                        if (deletedCols == null) {
                            deletedCols = new ArrayList<>();
                        }
                        List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
                        for (ColumnStatisticsObj statsObj : statsObjs) {
                            boolean found = false;
                            for (FieldSchema newCol : newCols) {
                                if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                                    found = true;
                                    break;
                                }
                            }
                            if (found) {
                                if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) {
                                    if (doAlterTable) {
                                        msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName(), colStats.getEngine());
                                    }
                                    newStatsObjs.add(statsObj);
                                    deletedCols.add(statsObj.getColName());
                                }
                            } else {
                                if (doAlterTable) {
                                    msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName(), colStats.getEngine());
                                }
                                deletedCols.add(statsObj.getColName());
                            }
                        }
                        if (doAlterTable) {
                            StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols);
                            // Change stats
                            ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
                            statsDesc.setDbName(newDbName);
                            statsDesc.setTableName(newTableName);
                            colStats.setStatsObj(newStatsObjs);
                            newMultiColStats.add(colStats);
                        }
                    }
                }
            }
        }
        if (doAlterTable) {
            Deadline.checkTimeout();
            // Change to new table and append stats for the new table
            msdb.alterTable(catName, dbName, tableName, newTable, validWriteIds);
            if (updateColumnStats) {
                for (ColumnStatistics colStats : newMultiColStats) {
                    msdb.updateTableColumnStatistics(colStats, validWriteIds, newTable.getWriteId());
                }
            }
        }
    } catch (NoSuchObjectException nsoe) {
        LOG.debug("Could not find db entry." + nsoe);
    } catch (InvalidInputException e) {
        // should not happen since the input were verified before passed in
        throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
    }
    return newMultiColStats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 12 with InvalidInputException

use of org.apache.hadoop.hive.metastore.api.InvalidInputException in project hive by apache.

the class HiveAlterHandler method updateOrGetPartitionColumnStats.

public static List<ColumnStatistics> updateOrGetPartitionColumnStats(RawStore msdb, String catName, String dbname, String tblname, List<String> partVals, List<FieldSchema> oldCols, Table table, Partition part, List<FieldSchema> newCols, List<String> deletedCols) throws MetaException, InvalidObjectException {
    List<ColumnStatistics> newPartsColStats = new ArrayList<>();
    boolean updateColumnStats = true;
    try {
        // if newCols are not specified, use default ones.
        if (newCols == null) {
            newCols = part.getSd() == null ? new ArrayList<>() : part.getSd().getCols();
        }
        String oldPartName = Warehouse.makePartName(table.getPartitionKeys(), partVals);
        String newPartName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues());
        boolean rename = !part.getDbName().equals(dbname) || !part.getTableName().equals(tblname) || !oldPartName.equals(newPartName);
        // do not need to update column stats if alter partition is not for rename or changing existing columns
        if (!rename && MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols)) {
            return newPartsColStats;
        }
        List<String> oldColNames = new ArrayList<>(oldCols.size());
        for (FieldSchema oldCol : oldCols) {
            oldColNames.add(oldCol.getName());
        }
        List<String> oldPartNames = Lists.newArrayList(oldPartName);
        // TODO: doesn't take txn stats into account. This method can only remove stats.
        List<List<ColumnStatistics>> multiPartsColStats = msdb.getPartitionColumnStatistics(catName, dbname, tblname, oldPartNames, oldColNames);
        for (List<ColumnStatistics> partsColStats : multiPartsColStats) {
            assert (partsColStats.size() <= 1);
            // for out para, this value is initialized by caller.
            if (deletedCols == null) {
                deletedCols = new ArrayList<>();
            } else {
                // in case deletedCols is provided by caller, stats will be updated  by caller.
                updateColumnStats = false;
            }
            for (ColumnStatistics partColStats : partsColStats) {
                // actually only at most one loop
                List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
                List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
                for (ColumnStatisticsObj statsObj : statsObjs) {
                    boolean found = false;
                    for (FieldSchema newCol : newCols) {
                        if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                            found = true;
                            break;
                        }
                    }
                    Deadline.checkTimeout();
                    if (found) {
                        if (rename) {
                            if (updateColumnStats) {
                                msdb.deletePartitionColumnStatistics(catName, dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName(), partColStats.getEngine());
                            } else {
                                deletedCols.add(statsObj.getColName());
                            }
                            newStatsObjs.add(statsObj);
                        }
                    } else {
                        if (updateColumnStats) {
                            msdb.deletePartitionColumnStatistics(catName, dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName(), partColStats.getEngine());
                        }
                        deletedCols.add(statsObj.getColName());
                    }
                }
                if (updateColumnStats) {
                    StatsSetupConst.removeColumnStatsState(part.getParameters(), deletedCols);
                }
                if (!newStatsObjs.isEmpty()) {
                    partColStats.setStatsObj(newStatsObjs);
                    newPartsColStats.add(partColStats);
                }
            }
        }
    } catch (NoSuchObjectException nsoe) {
    // ignore this exception, actually this exception won't be thrown from getPartitionColumnStatistics
    } catch (InvalidInputException iie) {
        throw new InvalidObjectException("Invalid input to delete partition column stats." + iie);
    }
    return newPartsColStats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException)

Example 13 with InvalidInputException

use of org.apache.hadoop.hive.metastore.api.InvalidInputException in project hive by apache.

the class ObjectStore method computeNextExecutionTime.

private Integer computeNextExecutionTime(String schedule, ZonedDateTime time) throws InvalidInputException {
    CronType cronType = CronType.QUARTZ;
    CronDefinition cronDefinition = CronDefinitionBuilder.instanceDefinitionFor(cronType);
    CronParser parser = new CronParser(cronDefinition);
    // Get date for last execution
    try {
        ExecutionTime executionTime = ExecutionTime.forCron(parser.parse(schedule));
        Optional<ZonedDateTime> nextExecution = executionTime.nextExecution(time);
        if (!nextExecution.isPresent()) {
            // no valid next execution time.
            return null;
        }
        return (int) nextExecution.get().toEpochSecond();
    } catch (IllegalArgumentException iae) {
        String message = "Invalid " + cronType + " schedule expression: '" + schedule + "'";
        LOG.error(message, iae);
        throw new InvalidInputException(message);
    }
}
Also used : ExecutionTime(com.cronutils.model.time.ExecutionTime) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) ZonedDateTime(java.time.ZonedDateTime) CronDefinition(com.cronutils.model.definition.CronDefinition) CronType(com.cronutils.model.CronType) CronParser(com.cronutils.parser.CronParser)

Aggregations

InvalidInputException (org.apache.hadoop.hive.metastore.api.InvalidInputException)13 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)11 InvalidObjectException (org.apache.hadoop.hive.metastore.api.InvalidObjectException)9 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ArrayList (java.util.ArrayList)6 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)6 Partition (org.apache.hadoop.hive.metastore.api.Partition)5 Table (org.apache.hadoop.hive.metastore.api.Table)5 IOException (java.io.IOException)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)4 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)4 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)4 LinkedList (java.util.LinkedList)3 List (java.util.List)3 Query (javax.jdo.Query)3 Database (org.apache.hadoop.hive.metastore.api.Database)3 ScheduledQuery (org.apache.hadoop.hive.metastore.api.ScheduledQuery)3 MScheduledQuery (org.apache.hadoop.hive.metastore.model.MScheduledQuery)3