use of org.apache.hudi.client.bootstrap.BootstrapWriteStatus in project hudi by apache.
the class BaseBootstrapMetadataHandler method runMetadataBootstrap.
public BootstrapWriteStatus runMetadataBootstrap(String srcPartitionPath, String partitionPath, KeyGeneratorInterface keyGenerator) {
Path sourceFilePath = FileStatusUtils.toPath(srcFileStatus.getPath());
HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle = new HoodieBootstrapHandle(config, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, table, partitionPath, FSUtils.createNewFileIdPfx(), table.getTaskContextSupplier());
try {
Schema avroSchema = getAvroSchema(sourceFilePath);
Schema recordKeySchema = HoodieAvroUtils.generateProjectionSchema(avroSchema, keyGenerator.getRecordKeyFieldNames());
LOG.info("Schema to be used for reading record Keys :" + recordKeySchema);
AvroReadSupport.setAvroReadSchema(table.getHadoopConf(), recordKeySchema);
AvroReadSupport.setRequestedProjection(table.getHadoopConf(), recordKeySchema);
executeBootstrap(bootstrapHandle, sourceFilePath, keyGenerator, partitionPath, avroSchema);
} catch (Exception e) {
throw new HoodieException(e.getMessage(), e);
}
BootstrapWriteStatus writeStatus = (BootstrapWriteStatus) bootstrapHandle.writeStatuses().get(0);
BootstrapFileMapping bootstrapFileMapping = new BootstrapFileMapping(config.getBootstrapSourceBasePath(), srcPartitionPath, partitionPath, srcFileStatus, writeStatus.getFileId());
writeStatus.setBootstrapSourceFileMapping(bootstrapFileMapping);
return writeStatus;
}
use of org.apache.hudi.client.bootstrap.BootstrapWriteStatus in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method runMetadataBootstrap.
private HoodieData<BootstrapWriteStatus> runMetadataBootstrap(List<Pair<String, List<HoodieFileStatus>>> partitions) {
if (null == partitions || partitions.isEmpty()) {
return context.emptyHoodieData();
}
TypedProperties properties = new TypedProperties();
properties.putAll(config.getProps());
KeyGeneratorInterface keyGenerator;
try {
keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(properties);
} catch (IOException e) {
throw new HoodieKeyGeneratorException("Init keyGenerator failed ", e);
}
BootstrapPartitionPathTranslator translator = (BootstrapPartitionPathTranslator) ReflectionUtils.loadClass(config.getBootstrapPartitionPathTranslatorClass(), properties);
List<Pair<String, Pair<String, HoodieFileStatus>>> bootstrapPaths = partitions.stream().flatMap(p -> {
String translatedPartitionPath = translator.getBootstrapTranslatedPath(p.getKey());
return p.getValue().stream().map(f -> Pair.of(p.getKey(), Pair.of(translatedPartitionPath, f)));
}).collect(Collectors.toList());
context.setJobStatus(this.getClass().getSimpleName(), "Bootstrap metadata table.");
return context.parallelize(bootstrapPaths, config.getBootstrapParallelism()).map(partitionFsPair -> getMetadataHandler(config, table, partitionFsPair.getRight().getRight()).runMetadataBootstrap(partitionFsPair.getLeft(), partitionFsPair.getRight().getLeft(), keyGenerator));
}
use of org.apache.hudi.client.bootstrap.BootstrapWriteStatus in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method metadataBootstrap.
/**
* Perform Metadata Bootstrap.
* @param partitionFilesList List of partitions and files within that partitions
*/
protected Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> metadataBootstrap(List<Pair<String, List<HoodieFileStatus>>> partitionFilesList) {
if (null == partitionFilesList || partitionFilesList.isEmpty()) {
return Option.empty();
}
HoodieTableMetaClient metaClient = table.getMetaClient();
metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(State.REQUESTED, metaClient.getCommitActionType(), HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS));
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, metaClient.getCommitActionType(), HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS), Option.empty());
HoodieData<BootstrapWriteStatus> bootstrapWriteStatuses = runMetadataBootstrap(partitionFilesList);
HoodieWriteMetadata<HoodieData<WriteStatus>> result = new HoodieWriteMetadata<>();
updateIndexAndCommitIfNeeded(bootstrapWriteStatuses.map(w -> w), result);
return Option.of(result);
}
use of org.apache.hudi.client.bootstrap.BootstrapWriteStatus in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method commit.
@Override
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<HoodieData<WriteStatus>> result) {
// Perform bootstrap index write and then commit. Make sure both record-key and bootstrap-index
// is all done in a single job DAG.
Map<String, List<Pair<BootstrapFileMapping, HoodieWriteStat>>> bootstrapSourceAndStats = result.getWriteStatuses().collectAsList().stream().map(w -> {
BootstrapWriteStatus ws = (BootstrapWriteStatus) w;
return Pair.of(ws.getBootstrapSourceFileMapping(), ws.getStat());
}).collect(Collectors.groupingBy(w -> w.getKey().getPartitionPath()));
HoodieTableMetaClient metaClient = table.getMetaClient();
try (BootstrapIndex.IndexWriter indexWriter = BootstrapIndex.getBootstrapIndex(metaClient).createWriter(metaClient.getTableConfig().getBootstrapBasePath().get())) {
LOG.info("Starting to write bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
indexWriter.begin();
bootstrapSourceAndStats.forEach((key, value) -> indexWriter.appendNextPartition(key, value.stream().map(Pair::getKey).collect(Collectors.toList())));
indexWriter.finish();
LOG.info("Finished writing bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
}
commit(extraMetadata, result, bootstrapSourceAndStats.values().stream().flatMap(f -> f.stream().map(Pair::getValue)).collect(Collectors.toList()));
LOG.info("Committing metadata bootstrap !!");
}
Aggregations