Search in sources :

Example 51 with HoodieSparkEngineContext

use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.

the class TestTimelineServerBasedWriteMarkers method setup.

@BeforeEach
public void setup() throws IOException {
    initPath();
    initMetaClient();
    this.jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(TestTimelineServerBasedWriteMarkers.class.getName()));
    this.context = new HoodieSparkEngineContext(jsc);
    this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
    this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
    FileSystemViewStorageConfig storageConf = FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    try {
        timelineService = new TimelineService(localEngineContext, new Configuration(), TimelineService.Config.builder().serverPort(0).enableMarkerRequests(true).build(), FileSystem.get(new Configuration()), FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, storageConf, HoodieCommonConfig.newBuilder().build()));
        timelineService.startService();
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    this.writeMarkers = new TimelineServerBasedWriteMarkers(metaClient.getBasePath(), markerFolderPath.toString(), "000", "localhost", timelineService.getServerPort(), 300);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) Configuration(org.apache.hadoop.conf.Configuration) TimelineService(org.apache.hudi.timeline.service.TimelineService) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 52 with HoodieSparkEngineContext

use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.

the class SparkClientFunctionalTestHarness method runBeforeEach.

@BeforeEach
public synchronized void runBeforeEach() {
    initialized = spark != null;
    if (!initialized) {
        SparkConf sparkConf = conf();
        SparkRDDWriteClient.registerClasses(sparkConf);
        HoodieReadClient.addHoodieSupport(sparkConf);
        spark = SparkSession.builder().config(sparkConf).getOrCreate();
        sqlContext = spark.sqlContext();
        jsc = new JavaSparkContext(spark.sparkContext());
        context = new HoodieSparkEngineContext(jsc);
        timelineService = HoodieClientTestUtils.initTimelineService(context, basePath(), incrementTimelineServicePortToUse());
        timelineServicePort = timelineService.getServerPort();
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 53 with HoodieSparkEngineContext

use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.

the class MetadataCommand method init.

@CliCommand(value = "metadata init", help = "Update the metadata table from commits since the creation")
public String init(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master, @CliOption(key = { "readonly" }, unspecifiedDefaultValue = "false", help = "Open in read-only mode") final boolean readOnly) throws Exception {
    HoodieCLI.getTableMetaClient();
    Path metadataPath = new Path(getMetadataTableBasePath(HoodieCLI.basePath));
    try {
        HoodieCLI.fs.listStatus(metadataPath);
    } catch (FileNotFoundException e) {
        // Metadata directory does not exist
        throw new RuntimeException("Metadata directory (" + metadataPath.toString() + ") does not exist.");
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    if (!readOnly) {
        HoodieWriteConfig writeConfig = getWriteConfig();
        initJavaSparkContext(Option.of(master));
        SparkHoodieBackedTableMetadataWriter.create(HoodieCLI.conf, writeConfig, new HoodieSparkEngineContext(jsc));
    }
    String action = readOnly ? "Opened" : "Initialized";
    return String.format(action + " Metadata Table in %s (duration=%.2fsec)", metadataPath, (timer.endTimer()) / 1000.0);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) FileNotFoundException(java.io.FileNotFoundException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 54 with HoodieSparkEngineContext

use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.

the class SparkMain method upgradeOrDowngradeTable.

/**
 * Upgrade or downgrade table.
 *
 * @param jsc instance of {@link JavaSparkContext} to use.
 * @param basePath base path of the dataset.
 * @param toVersion version to which upgrade/downgrade to be done.
 * @return 0 if success, else -1.
 * @throws Exception
 */
protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePath, String toVersion) {
    HoodieWriteConfig config = getWriteConfig(basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()));
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(config.getBasePath()).setLoadActiveTimelineOnLoad(false).setConsistencyGuardConfig(config.getConsistencyGuardConfig()).setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).setFileSystemRetryConfig(config.getFileSystemRetryConfig()).build();
    try {
        new UpgradeDowngrade(metaClient, config, new HoodieSparkEngineContext(jsc), SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.valueOf(toVersion), null);
        LOG.info(String.format("Table at \"%s\" upgraded / downgraded to version \"%s\".", basePath, toVersion));
        return 0;
    } catch (Exception e) {
        LOG.warn(String.format("Failed: Could not upgrade/downgrade table at \"%s\" to version \"%s\".", basePath, toVersion), e);
        return -1;
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) UpgradeDowngrade(org.apache.hudi.table.upgrade.UpgradeDowngrade) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) IOException(java.io.IOException)

Example 55 with HoodieSparkEngineContext

use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.

the class CLIFunctionalTestHarness method runBeforeEach.

@BeforeEach
public synchronized void runBeforeEach() {
    initialized = spark != null && shell != null;
    if (!initialized) {
        SparkConf sparkConf = conf();
        SparkRDDWriteClient.registerClasses(sparkConf);
        HoodieReadClient.addHoodieSupport(sparkConf);
        spark = SparkSession.builder().config(sparkConf).getOrCreate();
        sqlContext = spark.sqlContext();
        jsc = new JavaSparkContext(spark.sparkContext());
        context = new HoodieSparkEngineContext(jsc);
        shell = new Bootstrap().getJLineShellComponent();
        timelineService = HoodieClientTestUtils.initTimelineService(context, basePath(), incrementTimelineServicePortToUse());
        timelineServicePort = timelineService.getServerPort();
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Bootstrap(org.springframework.shell.Bootstrap) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) BeforeEach(org.junit.jupiter.api.BeforeEach)

Aggregations

HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)58 Path (org.apache.hadoop.fs.Path)25 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)24 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)23 ArrayList (java.util.ArrayList)19 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)19 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)17 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)17 WriteStatus (org.apache.hudi.client.WriteStatus)15 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 List (java.util.List)14 Option (org.apache.hudi.common.util.Option)14 LogManager (org.apache.log4j.LogManager)14 Logger (org.apache.log4j.Logger)14 Test (org.junit.jupiter.api.Test)14 Collectors (java.util.stream.Collectors)12 FileStatus (org.apache.hadoop.fs.FileStatus)12 FileSystem (org.apache.hadoop.fs.FileSystem)12 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)11