Search in sources :

Example 1 with HoodieHiveSyncException

use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.

the class HiveSchemaUtil method getSchemaDifference.

public static SchemaDifference getSchemaDifference(MessageType storageSchema, Map<String, String> tableSchema, List<String> partitionKeys, boolean supportTimestamp) {
    Map<String, String> newTableSchema;
    try {
        newTableSchema = convertParquetSchemaToHiveSchema(storageSchema, supportTimestamp);
    } catch (IOException e) {
        throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema", e);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
    }
    SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema);
    Set<String> tableColumns = new HashSet<>();
    for (Map.Entry<String, String> field : tableSchema.entrySet()) {
        String fieldName = field.getKey().toLowerCase();
        String tickSurroundedFieldName = tickSurround(fieldName);
        if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
            schemaDiffBuilder.deleteTableColumn(fieldName);
        } else {
            // check type
            String tableColumnType = field.getValue();
            if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
                if (partitionKeys.contains(fieldName)) {
                    // Partition key does not have to be part of the storage schema
                    continue;
                }
                // We will log this and continue. Hive schema is a superset of all parquet schemas
                LOG.warn("Ignoring table column " + fieldName + " as its not present in the parquet schema");
                continue;
            }
            tableColumnType = tableColumnType.replaceAll("\\s+", "");
            String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
            expectedType = expectedType.replaceAll("\\s+", "");
            expectedType = expectedType.replaceAll("`", "");
            if (!tableColumnType.equalsIgnoreCase(expectedType)) {
                // rules
                if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
                    throw new HoodieHiveSyncException("Could not convert field Type from " + tableColumnType + " to " + expectedType + " for field " + fieldName);
                }
                schemaDiffBuilder.updateTableColumn(fieldName, getExpectedType(newTableSchema, tickSurroundedFieldName));
            }
        }
        tableColumns.add(tickSurroundedFieldName);
    }
    for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
        if (!tableColumns.contains(entry.getKey().toLowerCase())) {
            schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Difference between schemas: " + schemaDiffBuilder.build().toString());
    }
    return schemaDiffBuilder.build();
}
Also used : IOException(java.io.IOException) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) SchemaDifference(org.apache.hudi.hive.SchemaDifference) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 2 with HoodieHiveSyncException

use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.

the class HoodieDLAClient method getLastCommitTimeSynced.

@Override
public Option<String> getLastCommitTimeSynced(String tableName) {
    String sql = consutructShowCreateTableSQL(tableName);
    Statement stmt = null;
    ResultSet rs = null;
    try {
        stmt = connection.createStatement();
        rs = stmt.executeQuery(sql);
        if (rs.next()) {
            String table = rs.getString(2);
            Map<String, String> attr = new HashMap<>();
            int index = table.indexOf(TBL_PROPERTIES_STR);
            if (index != -1) {
                String sub = table.substring(index + TBL_PROPERTIES_STR.length());
                sub = sub.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("'", "");
                String[] str = sub.split(",");
                for (int i = 0; i < str.length; i++) {
                    String key = str[i].split("=")[0].trim();
                    String value = str[i].split("=")[1].trim();
                    attr.put(key, value);
                }
            }
            return Option.ofNullable(attr.getOrDefault(HOODIE_LAST_COMMIT_TIME_SYNC, null));
        }
        return Option.empty();
    } catch (Exception e) {
        throw new HoodieHiveSyncException("Failed to get the last commit time synced from the table", e);
    } finally {
        closeQuietly(rs, stmt);
    }
}
Also used : HashMap(java.util.HashMap) Statement(java.sql.Statement) ResultSet(java.sql.ResultSet) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) HoodieException(org.apache.hudi.exception.HoodieException) SQLException(java.sql.SQLException) IOException(java.io.IOException) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException)

Example 3 with HoodieHiveSyncException

use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.

the class HMSDDLExecutor method updatePartitionsToTable.

@Override
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
    if (changedPartitions.isEmpty()) {
        LOG.info("No partitions to change for " + tableName);
        return;
    }
    LOG.info("Changing partitions " + changedPartitions.size() + " on " + tableName);
    try {
        StorageDescriptor sd = client.getTable(syncConfig.databaseName, tableName).getSd();
        List<Partition> partitionList = changedPartitions.stream().map(partition -> {
            Path partitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition);
            String partitionScheme = partitionPath.toUri().getScheme();
            String fullPartitionPath = StorageSchemes.HDFS.getScheme().equals(partitionScheme) ? FSUtils.getDFSFullPartitionPath(fs, partitionPath) : partitionPath.toString();
            List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
            sd.setLocation(fullPartitionPath);
            return new Partition(partitionValues, syncConfig.databaseName, tableName, 0, 0, sd, null);
        }).collect(Collectors.toList());
        client.alter_partitions(syncConfig.databaseName, tableName, partitionList, null);
    } catch (TException e) {
        LOG.error(syncConfig.databaseName + "." + tableName + " update partition failed", e);
        throw new HoodieHiveSyncException(syncConfig.databaseName + "." + tableName + " update partition failed", e);
    }
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) FileSystem(org.apache.hadoop.fs.FileSystem) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) Partition(org.apache.hadoop.hive.metastore.api.Partition) LinkedHashMap(java.util.LinkedHashMap) HiveSchemaUtil(org.apache.hudi.hive.util.HiveSchemaUtil) Logger(org.apache.log4j.Logger) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HivePartitionUtil(org.apache.hudi.hive.util.HivePartitionUtil) PartitionValueExtractor(org.apache.hudi.hive.PartitionValueExtractor) Hive(org.apache.hadoop.hive.ql.metadata.Hive) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) HiveConf(org.apache.hadoop.hive.conf.HiveConf) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) StorageSchemes(org.apache.hudi.common.fs.StorageSchemes) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) TableType(org.apache.hadoop.hive.metastore.TableType) LogManager(org.apache.log4j.LogManager) Database(org.apache.hadoop.hive.metastore.api.Database) FSUtils(org.apache.hudi.common.fs.FSUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Path(org.apache.hadoop.fs.Path) TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) List(java.util.List) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException)

Example 4 with HoodieHiveSyncException

use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.

the class HMSDDLExecutor method getTableSchema.

@Override
public Map<String, String> getTableSchema(String tableName) {
    try {
        // HiveMetastoreClient returns partition keys separate from Columns, hence get both and merge to
        // get the Schema of the table.
        final long start = System.currentTimeMillis();
        Table table = this.client.getTable(syncConfig.databaseName, tableName);
        Map<String, String> partitionKeysMap = table.getPartitionKeys().stream().collect(Collectors.toMap(FieldSchema::getName, f -> f.getType().toUpperCase()));
        Map<String, String> columnsMap = table.getSd().getCols().stream().collect(Collectors.toMap(FieldSchema::getName, f -> f.getType().toUpperCase()));
        Map<String, String> schema = new HashMap<>();
        schema.putAll(columnsMap);
        schema.putAll(partitionKeysMap);
        final long end = System.currentTimeMillis();
        LOG.info(String.format("Time taken to getTableSchema: %s ms", (end - start)));
        return schema;
    } catch (Exception e) {
        throw new HoodieHiveSyncException("Failed to get table schema for : " + tableName, e);
    }
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) FileSystem(org.apache.hadoop.fs.FileSystem) HashMap(java.util.HashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) Partition(org.apache.hadoop.hive.metastore.api.Partition) LinkedHashMap(java.util.LinkedHashMap) HiveSchemaUtil(org.apache.hudi.hive.util.HiveSchemaUtil) Logger(org.apache.log4j.Logger) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HivePartitionUtil(org.apache.hudi.hive.util.HivePartitionUtil) PartitionValueExtractor(org.apache.hudi.hive.PartitionValueExtractor) Hive(org.apache.hadoop.hive.ql.metadata.Hive) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) HiveConf(org.apache.hadoop.hive.conf.HiveConf) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) StorageSchemes(org.apache.hudi.common.fs.StorageSchemes) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) TableType(org.apache.hadoop.hive.metastore.TableType) LogManager(org.apache.log4j.LogManager) Database(org.apache.hadoop.hive.metastore.api.Database) FSUtils(org.apache.hudi.common.fs.FSUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) TException(org.apache.thrift.TException) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 5 with HoodieHiveSyncException

use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.

the class HMSDDLExecutor method updateTableDefinition.

@Override
public void updateTableDefinition(String tableName, MessageType newSchema) {
    try {
        boolean cascade = syncConfig.partitionFields.size() > 0;
        List<FieldSchema> fieldSchema = HiveSchemaUtil.convertParquetSchemaToHiveFieldSchema(newSchema, syncConfig);
        Table table = client.getTable(syncConfig.databaseName, tableName);
        StorageDescriptor sd = table.getSd();
        sd.setCols(fieldSchema);
        table.setSd(sd);
        EnvironmentContext environmentContext = new EnvironmentContext();
        if (cascade) {
            LOG.info("partition table,need cascade");
            environmentContext.putToProperties(StatsSetupConst.CASCADE, StatsSetupConst.TRUE);
        }
        client.alter_table_with_environmentContext(syncConfig.databaseName, tableName, table, environmentContext);
    } catch (Exception e) {
        LOG.error("Failed to update table for " + tableName, e);
        throw new HoodieHiveSyncException("Failed to update table for " + tableName, e);
    }
}
Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) TException(org.apache.thrift.TException) HoodieHiveSyncException(org.apache.hudi.hive.HoodieHiveSyncException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

HoodieHiveSyncException (org.apache.hudi.hive.HoodieHiveSyncException)18 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 TException (org.apache.thrift.TException)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)7 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 Table (org.apache.hadoop.hive.metastore.api.Table)7 Map (java.util.Map)6 EnvironmentContext (org.apache.hadoop.hive.metastore.api.EnvironmentContext)6 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)6 LinkedHashMap (java.util.LinkedHashMap)5 List (java.util.List)5 Collectors (java.util.stream.Collectors)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)5 Database (org.apache.hadoop.hive.metastore.api.Database)5 Hive (org.apache.hadoop.hive.ql.metadata.Hive)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5