use of org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo in project hudi by apache.
the class HoodieRealtimeInputFormatUtils method addRequiredProjectionFields.
public static void addRequiredProjectionFields(Configuration configuration, Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo) {
// Need this to do merge records in HoodieRealtimeRecordReader
if (!hoodieVirtualKeyInfo.isPresent()) {
addProjectionField(configuration, HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS);
addProjectionField(configuration, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieInputFormatUtils.HOODIE_COMMIT_TIME_COL_POS);
addProjectionField(configuration, HoodieRecord.PARTITION_PATH_METADATA_FIELD, HoodieInputFormatUtils.HOODIE_PARTITION_PATH_COL_POS);
} else {
HoodieVirtualKeyInfo hoodieVirtualKey = hoodieVirtualKeyInfo.get();
addProjectionField(configuration, hoodieVirtualKey.getRecordKeyField(), hoodieVirtualKey.getRecordKeyFieldIndex());
addProjectionField(configuration, hoodieVirtualKey.getPartitionPathField(), hoodieVirtualKey.getPartitionPathFieldIndex());
}
}
use of org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo in project hudi by apache.
the class HoodieCopyOnWriteTableInputFormat method getHoodieVirtualKeyInfo.
protected static Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo(HoodieTableMetaClient metaClient) {
HoodieTableConfig tableConfig = metaClient.getTableConfig();
if (tableConfig.populateMetaFields()) {
return Option.empty();
}
TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
try {
Schema schema = tableSchemaResolver.getTableAvroSchema();
return Option.of(new HoodieVirtualKeyInfo(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp(), schema.getField(tableConfig.getRecordKeyFieldProp()).pos(), schema.getField(tableConfig.getPartitionFieldProp()).pos()));
} catch (Exception exception) {
throw new HoodieException("Fetching table schema failed with exception ", exception);
}
}
use of org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo in project hudi by apache.
the class HoodieCopyOnWriteTableInputFormat method listStatusForSnapshotMode.
@Nonnull
private List<FileStatus> listStatusForSnapshotMode(JobConf job, Map<String, HoodieTableMetaClient> tableMetaClientMap, List<Path> snapshotPaths) throws IOException {
HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(job);
List<FileStatus> targetFiles = new ArrayList<>();
TypedProperties props = new TypedProperties(new Properties());
Map<HoodieTableMetaClient, List<Path>> groupedPaths = HoodieInputFormatUtils.groupSnapshotPathsByMetaClient(tableMetaClientMap.values(), snapshotPaths);
for (Map.Entry<HoodieTableMetaClient, List<Path>> entry : groupedPaths.entrySet()) {
HoodieTableMetaClient tableMetaClient = entry.getKey();
List<Path> partitionPaths = entry.getValue();
// Hive job might specify a max commit instant up to which table's state
// should be examined. We simply pass it as query's instant to the file-index
Option<String> queryCommitInstant = HoodieHiveUtils.getMaxCommit(job, tableMetaClient.getTableConfig().getTableName());
boolean shouldIncludePendingCommits = HoodieHiveUtils.shouldIncludePendingCommits(job, tableMetaClient.getTableConfig().getTableName());
HiveHoodieTableFileIndex fileIndex = new HiveHoodieTableFileIndex(engineContext, tableMetaClient, props, HoodieTableQueryType.SNAPSHOT, partitionPaths, queryCommitInstant, shouldIncludePendingCommits);
Map<String, List<FileSlice>> partitionedFileSlices = fileIndex.listFileSlices();
Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt = getHoodieVirtualKeyInfo(tableMetaClient);
targetFiles.addAll(partitionedFileSlices.values().stream().flatMap(Collection::stream).map(fileSlice -> createFileStatusUnchecked(fileSlice, fileIndex, virtualKeyInfoOpt)).collect(Collectors.toList()));
}
return targetFiles;
}
Aggregations