use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.
the class HiveSchemaUtil method getSchemaDifference.
public static SchemaDifference getSchemaDifference(MessageType storageSchema, Map<String, String> tableSchema, List<String> partitionKeys, boolean supportTimestamp) {
Map<String, String> newTableSchema;
try {
newTableSchema = convertParquetSchemaToHiveSchema(storageSchema, supportTimestamp);
} catch (IOException e) {
throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema", e);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
}
SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema);
Set<String> tableColumns = new HashSet<>();
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
String fieldName = field.getKey().toLowerCase();
String tickSurroundedFieldName = tickSurround(fieldName);
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
schemaDiffBuilder.deleteTableColumn(fieldName);
} else {
// check type
String tableColumnType = field.getValue();
if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
if (partitionKeys.contains(fieldName)) {
// Partition key does not have to be part of the storage schema
continue;
}
// We will log this and continue. Hive schema is a superset of all parquet schemas
LOG.warn("Ignoring table column " + fieldName + " as its not present in the parquet schema");
continue;
}
tableColumnType = tableColumnType.replaceAll("\\s+", "");
String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
expectedType = expectedType.replaceAll("\\s+", "");
expectedType = expectedType.replaceAll("`", "");
if (!tableColumnType.equalsIgnoreCase(expectedType)) {
// rules
if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
throw new HoodieHiveSyncException("Could not convert field Type from " + tableColumnType + " to " + expectedType + " for field " + fieldName);
}
schemaDiffBuilder.updateTableColumn(fieldName, getExpectedType(newTableSchema, tickSurroundedFieldName));
}
}
tableColumns.add(tickSurroundedFieldName);
}
for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
if (!tableColumns.contains(entry.getKey().toLowerCase())) {
schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Difference between schemas: " + schemaDiffBuilder.build().toString());
}
return schemaDiffBuilder.build();
}
use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.
the class HoodieDLAClient method getLastCommitTimeSynced.
@Override
public Option<String> getLastCommitTimeSynced(String tableName) {
String sql = consutructShowCreateTableSQL(tableName);
Statement stmt = null;
ResultSet rs = null;
try {
stmt = connection.createStatement();
rs = stmt.executeQuery(sql);
if (rs.next()) {
String table = rs.getString(2);
Map<String, String> attr = new HashMap<>();
int index = table.indexOf(TBL_PROPERTIES_STR);
if (index != -1) {
String sub = table.substring(index + TBL_PROPERTIES_STR.length());
sub = sub.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("'", "");
String[] str = sub.split(",");
for (int i = 0; i < str.length; i++) {
String key = str[i].split("=")[0].trim();
String value = str[i].split("=")[1].trim();
attr.put(key, value);
}
}
return Option.ofNullable(attr.getOrDefault(HOODIE_LAST_COMMIT_TIME_SYNC, null));
}
return Option.empty();
} catch (Exception e) {
throw new HoodieHiveSyncException("Failed to get the last commit time synced from the table", e);
} finally {
closeQuietly(rs, stmt);
}
}
use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.
the class HMSDDLExecutor method updatePartitionsToTable.
@Override
public void updatePartitionsToTable(String tableName, List<String> changedPartitions) {
if (changedPartitions.isEmpty()) {
LOG.info("No partitions to change for " + tableName);
return;
}
LOG.info("Changing partitions " + changedPartitions.size() + " on " + tableName);
try {
StorageDescriptor sd = client.getTable(syncConfig.databaseName, tableName).getSd();
List<Partition> partitionList = changedPartitions.stream().map(partition -> {
Path partitionPath = FSUtils.getPartitionPath(syncConfig.basePath, partition);
String partitionScheme = partitionPath.toUri().getScheme();
String fullPartitionPath = StorageSchemes.HDFS.getScheme().equals(partitionScheme) ? FSUtils.getDFSFullPartitionPath(fs, partitionPath) : partitionPath.toString();
List<String> partitionValues = partitionValueExtractor.extractPartitionValuesInPath(partition);
sd.setLocation(fullPartitionPath);
return new Partition(partitionValues, syncConfig.databaseName, tableName, 0, 0, sd, null);
}).collect(Collectors.toList());
client.alter_partitions(syncConfig.databaseName, tableName, partitionList, null);
} catch (TException e) {
LOG.error(syncConfig.databaseName + "." + tableName + " update partition failed", e);
throw new HoodieHiveSyncException(syncConfig.databaseName + "." + tableName + " update partition failed", e);
}
}
use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.
the class HMSDDLExecutor method getTableSchema.
@Override
public Map<String, String> getTableSchema(String tableName) {
try {
// HiveMetastoreClient returns partition keys separate from Columns, hence get both and merge to
// get the Schema of the table.
final long start = System.currentTimeMillis();
Table table = this.client.getTable(syncConfig.databaseName, tableName);
Map<String, String> partitionKeysMap = table.getPartitionKeys().stream().collect(Collectors.toMap(FieldSchema::getName, f -> f.getType().toUpperCase()));
Map<String, String> columnsMap = table.getSd().getCols().stream().collect(Collectors.toMap(FieldSchema::getName, f -> f.getType().toUpperCase()));
Map<String, String> schema = new HashMap<>();
schema.putAll(columnsMap);
schema.putAll(partitionKeysMap);
final long end = System.currentTimeMillis();
LOG.info(String.format("Time taken to getTableSchema: %s ms", (end - start)));
return schema;
} catch (Exception e) {
throw new HoodieHiveSyncException("Failed to get table schema for : " + tableName, e);
}
}
use of org.apache.hudi.hive.HoodieHiveSyncException in project hudi by apache.
the class HMSDDLExecutor method updateTableDefinition.
@Override
public void updateTableDefinition(String tableName, MessageType newSchema) {
try {
boolean cascade = syncConfig.partitionFields.size() > 0;
List<FieldSchema> fieldSchema = HiveSchemaUtil.convertParquetSchemaToHiveFieldSchema(newSchema, syncConfig);
Table table = client.getTable(syncConfig.databaseName, tableName);
StorageDescriptor sd = table.getSd();
sd.setCols(fieldSchema);
table.setSd(sd);
EnvironmentContext environmentContext = new EnvironmentContext();
if (cascade) {
LOG.info("partition table,need cascade");
environmentContext.putToProperties(StatsSetupConst.CASCADE, StatsSetupConst.TRUE);
}
client.alter_table_with_environmentContext(syncConfig.databaseName, tableName, table, environmentContext);
} catch (Exception e) {
LOG.error("Failed to update table for " + tableName, e);
throw new HoodieHiveSyncException("Failed to update table for " + tableName, e);
}
}
Aggregations