use of org.apache.hudi.client.bootstrap.BootstrapMode in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method execute.
@Override
public HoodieBootstrapWriteMetadata<HoodieData<WriteStatus>> execute() {
validate();
try {
HoodieTableMetaClient metaClient = table.getMetaClient();
Option<HoodieInstant> completedInstant = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
ValidationUtils.checkArgument(!completedInstant.isPresent(), "Active Timeline is expected to be empty for bootstrap to be performed. " + "If you want to re-bootstrap, please rollback bootstrap first !!");
Map<BootstrapMode, List<Pair<String, List<HoodieFileStatus>>>> partitionSelections = listAndProcessSourcePartitions();
// First run metadata bootstrap which will auto commit
Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> metadataResult = metadataBootstrap(partitionSelections.get(BootstrapMode.METADATA_ONLY));
// if there are full bootstrap to be performed, perform that too
Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> fullBootstrapResult = fullBootstrap(partitionSelections.get(BootstrapMode.FULL_RECORD));
// Delete the marker directory for the instant
WriteMarkersFactory.get(config.getMarkersType(), table, instantTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
return new HoodieBootstrapWriteMetadata(metadataResult, fullBootstrapResult);
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}
use of org.apache.hudi.client.bootstrap.BootstrapMode in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method listAndProcessSourcePartitions.
/**
* Return Bootstrap Mode selections for partitions listed and figure out bootstrap Schema.
* @return
* @throws IOException
*/
private Map<BootstrapMode, List<Pair<String, List<HoodieFileStatus>>>> listAndProcessSourcePartitions() throws IOException {
List<Pair<String, List<HoodieFileStatus>>> folders = BootstrapUtils.getAllLeafFoldersWithFiles(table.getMetaClient(), bootstrapSourceFileSystem, config.getBootstrapSourceBasePath(), context);
LOG.info("Fetching Bootstrap Schema !!");
HoodieBootstrapSchemaProvider sourceSchemaProvider = new HoodieSparkBootstrapSchemaProvider(config);
bootstrapSchema = sourceSchemaProvider.getBootstrapSchema(context, folders).toString();
LOG.info("Bootstrap Schema :" + bootstrapSchema);
BootstrapModeSelector selector = (BootstrapModeSelector) ReflectionUtils.loadClass(config.getBootstrapModeSelectorClass(), config);
Map<BootstrapMode, List<String>> result = selector.select(folders);
Map<String, List<HoodieFileStatus>> partitionToFiles = folders.stream().collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Ensure all partitions are accounted for
ValidationUtils.checkArgument(partitionToFiles.keySet().equals(result.values().stream().flatMap(Collection::stream).collect(Collectors.toSet())));
return result.entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue().stream().map(p -> Pair.of(p, partitionToFiles.get(p))).collect(Collectors.toList()))).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
}
Aggregations