Search in sources :

Example 1 with HoodieCommonConfig

use of org.apache.hudi.common.config.HoodieCommonConfig in project hudi by apache.

the class TestRemoteHoodieTableFileSystemView method getFileSystemView.

protected SyncableFileSystemView getFileSystemView(HoodieTimeline timeline) {
    FileSystemViewStorageConfig sConf = FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().build();
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    try {
        server = new TimelineService(localEngineContext, new Configuration(), TimelineService.Config.builder().serverPort(0).build(), FileSystem.get(new Configuration()), FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, sConf, commonConfig));
        server.startService();
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    LOG.info("Connecting to Timeline Server :" + server.getServerPort());
    view = new RemoteHoodieTableFileSystemView("localhost", server.getServerPort(), metaClient);
    return view;
}
Also used : FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) Configuration(org.apache.hadoop.conf.Configuration) TimelineService(org.apache.hudi.timeline.service.TimelineService) RemoteHoodieTableFileSystemView(org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext)

Example 2 with HoodieCommonConfig

use of org.apache.hudi.common.config.HoodieCommonConfig in project hudi by apache.

the class TimelineService method buildFileSystemViewManager.

public static FileSystemViewManager buildFileSystemViewManager(Config config, SerializableConfiguration conf) {
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(conf.get());
    // Just use defaults for now
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().build();
    switch(config.viewStorageType) {
        case MEMORY:
            FileSystemViewStorageConfig.Builder inMemConfBuilder = FileSystemViewStorageConfig.newBuilder();
            inMemConfBuilder.withStorageType(FileSystemViewStorageType.MEMORY);
            return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, inMemConfBuilder.build(), commonConfig);
        case SPILLABLE_DISK:
            {
                FileSystemViewStorageConfig.Builder spillableConfBuilder = FileSystemViewStorageConfig.newBuilder();
                spillableConfBuilder.withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).withBaseStoreDir(config.baseStorePathForFileGroups).withMaxMemoryForView(config.maxViewMemPerTableInMB * 1024 * 1024L).withMemFractionForPendingCompaction(config.memFractionForCompactionPerTable);
                return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, spillableConfBuilder.build(), commonConfig);
            }
        case EMBEDDED_KV_STORE:
            {
                FileSystemViewStorageConfig.Builder rocksDBConfBuilder = FileSystemViewStorageConfig.newBuilder();
                rocksDBConfBuilder.withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).withRocksDBPath(config.rocksDBPath);
                return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, rocksDBConfBuilder.build(), commonConfig);
            }
        default:
            throw new IllegalArgumentException("Invalid view manager storage type :" + config.viewStorageType);
    }
}
Also used : FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext)

Example 3 with HoodieCommonConfig

use of org.apache.hudi.common.config.HoodieCommonConfig in project hudi by apache.

the class HoodieBackedTableMetadata method getLogRecordScanner.

public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles, String partitionName) {
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<String> sortedLogFilePaths = logFiles.stream().sorted(HoodieLogFile.getLogFileComparator()).map(o -> o.getPath().toString()).collect(Collectors.toList());
    // Only those log files which have a corresponding completed instant on the dataset should be read
    // This is because the metadata table is updated before the dataset instants are committed.
    Set<String> validInstantTimestamps = getValidInstantTimestamps();
    Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
    String latestMetadataInstantTime = latestMetadataInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
    // Load the schema
    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
    HoodieMetadataMergedLogRecordReader logRecordScanner = HoodieMetadataMergedLogRecordReader.newBuilder().withFileSystem(metadataMetaClient.getFs()).withBasePath(metadataBasePath).withLogFilePaths(sortedLogFilePaths).withReaderSchema(schema).withLatestInstantTime(latestMetadataInstantTime).withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES).withBufferSize(BUFFER_SIZE).withSpillableMapBasePath(spillableMapDirectory).withDiskMapType(commonConfig.getSpillableDiskMapType()).withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled()).withLogBlockTimestamps(validInstantTimestamps).enableFullScan(metadataConfig.enableFullScan()).withPartition(partitionName).build();
    Long logScannerOpenMs = timer.endTimer();
    LOG.info(String.format("Opened %d metadata log files (dataset instant=%s, metadata instant=%s) in %d ms", sortedLogFilePaths.size(), getLatestDataInstantTime(), latestMetadataInstantTime, logScannerOpenMs));
    return Pair.of(logRecordScanner, logScannerOpenMs);
}
Also used : HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LinkedList(java.util.LinkedList) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) SpillableMapUtils(org.apache.hudi.common.util.SpillableMapUtils) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) Schema(org.apache.avro.Schema) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Aggregations

HoodieCommonConfig (org.apache.hudi.common.config.HoodieCommonConfig)3 HoodieMetadataConfig (org.apache.hudi.common.config.HoodieMetadataConfig)3 HoodieLocalEngineContext (org.apache.hudi.common.engine.HoodieLocalEngineContext)2 FileSystemViewStorageConfig (org.apache.hudi.common.table.view.FileSystemViewStorageConfig)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Collectors (java.util.stream.Collectors)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 HoodieAvroUtils (org.apache.hudi.avro.HoodieAvroUtils)1