use of org.apache.hive.iceberg.org.apache.orc.TypeDescription in project hive by apache.
the class VectorizedReadUtils method handleIcebergProjection.
/**
* Adjusts the jobConf so that column reorders and renames that might have happened since this ORC file was written
* are properly mapped to the schema of the original file.
* @param task - Iceberg task - required for
* @param job - JobConf instance to adjust
* @param fileSchema - ORC file schema of the input file
* @throws IOException - errors relating to accessing the ORC file
*/
public static void handleIcebergProjection(FileScanTask task, JobConf job, TypeDescription fileSchema) throws IOException {
// We need to map with the current (i.e. current Hive table columns) full schema (without projections),
// as OrcInputFormat will take care of the projections by the use of an include boolean array
PartitionSpec spec = task.spec();
Schema currentSchema = spec.schema();
TypeDescription readOrcSchema;
if (ORCSchemaUtil.hasIds(fileSchema)) {
readOrcSchema = ORCSchemaUtil.buildOrcProjection(currentSchema, fileSchema);
} else {
Schema readSchemaForOriginalFile = currentSchema;
// In case of migrated, originally partitioned tables, partition values are not present in the file
if (spec.isPartitioned()) {
readSchemaForOriginalFile = currentSchema.select(currentSchema.columns().stream().filter(c -> !spec.identitySourceIds().contains(c.fieldId())).map(c -> c.name()).collect(Collectors.toList()));
}
TypeDescription typeWithIds = ORCSchemaUtil.applyNameMapping(fileSchema, MappingUtil.create(currentSchema));
readOrcSchema = ORCSchemaUtil.buildOrcProjection(readSchemaForOriginalFile, typeWithIds);
}
job.set(ColumnProjectionUtils.ORC_SCHEMA_STRING, readOrcSchema.toString());
// Predicate pushdowns needs to be adjusted too in case of column renames, we let Iceberg generate this into job
if (task.residual() != null) {
Expression boundFilter = Binder.bind(currentSchema.asStruct(), task.residual(), false);
// Note the use of the unshaded version of this class here (required for SARG deseralization later)
org.apache.hadoop.hive.ql.io.sarg.SearchArgument sarg = ExpressionToOrcSearchArgument.convert(boundFilter, readOrcSchema);
if (sarg != null) {
job.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
job.unset(ConvertAstToSearchArg.SARG_PUSHDOWN);
job.set(ConvertAstToSearchArg.SARG_PUSHDOWN, ConvertAstToSearchArg.sargToKryo(sarg));
}
}
}
Aggregations