Search in sources :

Example 1 with BootstrapExecutor

use of org.apache.hudi.utilities.deltastreamer.BootstrapExecutor in project hudi by apache.

the class SparkMain method doBootstrap.

private static int doBootstrap(JavaSparkContext jsc, String tableName, String tableType, String basePath, String sourcePath, String recordKeyCols, String partitionFields, String parallelism, String schemaProviderClass, String bootstrapIndexClass, String selectorClass, String keyGenerator, String fullBootstrapInputProvider, String payloadClassName, String enableHiveSync, String propsFilePath, List<String> configs) throws IOException {
    TypedProperties properties = propsFilePath == null ? UtilHelpers.buildProperties(configs) : UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(propsFilePath), configs).getProps(true);
    properties.setProperty(HoodieBootstrapConfig.BASE_PATH.key(), sourcePath);
    if (!StringUtils.isNullOrEmpty(keyGenerator) && KeyGeneratorType.getNames().contains(keyGenerator.toUpperCase(Locale.ROOT))) {
        properties.setProperty(HoodieBootstrapConfig.KEYGEN_TYPE.key(), keyGenerator.toUpperCase(Locale.ROOT));
    } else {
        properties.setProperty(HoodieBootstrapConfig.KEYGEN_CLASS_NAME.key(), keyGenerator);
    }
    properties.setProperty(HoodieBootstrapConfig.FULL_BOOTSTRAP_INPUT_PROVIDER_CLASS_NAME.key(), fullBootstrapInputProvider);
    properties.setProperty(HoodieBootstrapConfig.PARALLELISM_VALUE.key(), parallelism);
    properties.setProperty(HoodieBootstrapConfig.MODE_SELECTOR_CLASS_NAME.key(), selectorClass);
    properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), recordKeyCols);
    properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), partitionFields);
    HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
    cfg.targetTableName = tableName;
    cfg.targetBasePath = basePath;
    cfg.tableType = tableType;
    cfg.schemaProviderClassName = schemaProviderClass;
    cfg.bootstrapIndexClass = bootstrapIndexClass;
    cfg.payloadClassName = payloadClassName;
    cfg.enableHiveSync = Boolean.valueOf(enableHiveSync);
    new BootstrapExecutor(cfg, jsc, FSUtils.getFs(basePath, jsc.hadoopConfiguration()), jsc.hadoopConfiguration(), properties).execute();
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) BootstrapExecutor(org.apache.hudi.utilities.deltastreamer.BootstrapExecutor) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) Config(org.apache.hudi.utilities.HDFSParquetImporter.Config) HoodieBootstrapConfig(org.apache.hudi.config.HoodieBootstrapConfig) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Aggregations

Path (org.apache.hadoop.fs.Path)1 TypedProperties (org.apache.hudi.common.config.TypedProperties)1 HoodieBootstrapConfig (org.apache.hudi.config.HoodieBootstrapConfig)1 HoodieIndexConfig (org.apache.hudi.config.HoodieIndexConfig)1 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)1 Config (org.apache.hudi.utilities.HDFSParquetImporter.Config)1 BootstrapExecutor (org.apache.hudi.utilities.deltastreamer.BootstrapExecutor)1 HoodieDeltaStreamer (org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer)1