Search in sources :

Example 1 with RemoteHoodieTableFileSystemView

use of org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView in project hudi by apache.

the class TestRemoteHoodieTableFileSystemView method getFileSystemView.

protected SyncableFileSystemView getFileSystemView(HoodieTimeline timeline) {
    FileSystemViewStorageConfig sConf = FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().build();
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    try {
        server = new TimelineService(localEngineContext, new Configuration(), TimelineService.Config.builder().serverPort(0).build(), FileSystem.get(new Configuration()), FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, sConf, commonConfig));
        server.startService();
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    LOG.info("Connecting to Timeline Server :" + server.getServerPort());
    view = new RemoteHoodieTableFileSystemView("localhost", server.getServerPort(), metaClient);
    return view;
}
Also used : FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) Configuration(org.apache.hadoop.conf.Configuration) TimelineService(org.apache.hudi.timeline.service.TimelineService) RemoteHoodieTableFileSystemView(org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext)

Example 2 with RemoteHoodieTableFileSystemView

use of org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView in project hudi by apache.

the class TimelineServerPerf method run.

public void run() throws IOException {
    JavaSparkContext jsc = UtilHelpers.buildSparkContext("hudi-view-perf-" + cfg.basePath, cfg.sparkMaster);
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(engineContext, cfg.basePath, cfg.useFileListingFromMetadata, true);
    Collections.shuffle(allPartitionPaths);
    List<String> selected = allPartitionPaths.stream().filter(p -> !p.contains("error")).limit(cfg.maxPartitions).collect(Collectors.toList());
    if (!useExternalTimelineServer) {
        this.timelineServer.startService();
        setHostAddrFromSparkConf(jsc.getConf());
    } else {
        this.hostAddr = cfg.serverHost;
    }
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(timelineServer.getConf()).setBasePath(cfg.basePath).setLoadActiveTimelineOnLoad(true).build();
    SyncableFileSystemView fsView = new RemoteHoodieTableFileSystemView(this.hostAddr, cfg.serverPort, metaClient);
    String reportDir = cfg.reportDir;
    metaClient.getFs().mkdirs(new Path(reportDir));
    String dumpPrefix = UUID.randomUUID().toString();
    System.out.println("First Iteration to load all partitions");
    Dumper d = new Dumper(metaClient.getFs(), new Path(reportDir, String.format("1_%s.csv", dumpPrefix)));
    d.init();
    d.dump(runLookups(jsc, selected, fsView, 1, 0));
    d.close();
    System.out.println("\n\n\n First Iteration is done");
    Dumper d2 = new Dumper(metaClient.getFs(), new Path(reportDir, String.format("2_%s.csv", dumpPrefix)));
    d2.init();
    d2.dump(runLookups(jsc, selected, fsView, cfg.numIterations, cfg.numCoresPerExecutor));
    d2.close();
    System.out.println("\n\n\nDumping all File Slices");
    selected.forEach(p -> fsView.getAllFileSlices(p).forEach(s -> System.out.println("\tMyFileSlice=" + s)));
    // Waiting for curl queries
    if (!useExternalTimelineServer && cfg.waitForManualQueries) {
        System.out.println("Timeline Server Host Address=" + hostAddr + ", port=" + timelineServer.getServerPort());
        while (true) {
            try {
                Thread.sleep(60000);
            } catch (InterruptedException e) {
            // skip it
            }
        }
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) Histogram(com.codahale.metrics.Histogram) ScheduledFuture(java.util.concurrent.ScheduledFuture) Parameter(com.beust.jcommander.Parameter) FileSystem(org.apache.hadoop.fs.FileSystem) FileSlice(org.apache.hudi.common.model.FileSlice) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) Random(java.util.Random) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) TimelineService(org.apache.hudi.timeline.service.TimelineService) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) UniformReservoir(com.codahale.metrics.UniformReservoir) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) FileSystemViewStorageType(org.apache.hudi.common.table.view.FileSystemViewStorageType) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) UtilHelpers(org.apache.hudi.utilities.UtilHelpers) JCommander(com.beust.jcommander.JCommander) SparkConf(org.apache.spark.SparkConf) IOException(java.io.IOException) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) RemoteHoodieTableFileSystemView(org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Snapshot(com.codahale.metrics.Snapshot) Serializable(java.io.Serializable) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) RemoteHoodieTableFileSystemView(org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)2 HoodieMetadataConfig (org.apache.hudi.common.config.HoodieMetadataConfig)2 HoodieLocalEngineContext (org.apache.hudi.common.engine.HoodieLocalEngineContext)2 FileSystemViewStorageConfig (org.apache.hudi.common.table.view.FileSystemViewStorageConfig)2 RemoteHoodieTableFileSystemView (org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView)2 TimelineService (org.apache.hudi.timeline.service.TimelineService)2 JCommander (com.beust.jcommander.JCommander)1 Parameter (com.beust.jcommander.Parameter)1 Histogram (com.codahale.metrics.Histogram)1 Snapshot (com.codahale.metrics.Snapshot)1 UniformReservoir (com.codahale.metrics.UniformReservoir)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 List (java.util.List)1 Random (java.util.Random)1 UUID (java.util.UUID)1 ExecutionException (java.util.concurrent.ExecutionException)1 ScheduledFuture (java.util.concurrent.ScheduledFuture)1