Search in sources :

Example 11 with HoodieLocalEngineContext

use of org.apache.hudi.common.engine.HoodieLocalEngineContext in project hudi by apache.

the class TestFSUtils method testParallelizeSubPathProcessWithExistingDir.

@Test
public void testParallelizeSubPathProcessWithExistingDir() throws IOException {
    String rootDir = basePath + "/.hoodie/.temp";
    FileSystem fileSystem = metaClient.getFs();
    prepareTestDirectory(fileSystem, rootDir);
    Map<String, List<String>> result = FSUtils.parallelizeSubPathProcess(new HoodieLocalEngineContext(fileSystem.getConf()), fileSystem, new Path(rootDir), 2, fileStatus -> !fileStatus.getPath().getName().contains("1"), pairOfSubPathAndConf -> {
        Path subPath = new Path(pairOfSubPathAndConf.getKey());
        List<String> listFiles = new ArrayList<>();
        try {
            FileSystem fs = subPath.getFileSystem(pairOfSubPathAndConf.getValue().get());
            FileStatus[] fileStatuses = fs.listStatus(subPath);
            listFiles = Arrays.stream(fileStatuses).map(fileStatus -> fileStatus.getPath().getName()).collect(Collectors.toList());
        } catch (IOException e) {
            e.printStackTrace();
        }
        return listFiles;
    });
    assertEquals(2, result.size());
    for (String subPath : result.keySet()) {
        if (subPath.contains("subdir2")) {
            assertEquals(Collections.singletonList("file2.txt"), result.get(subPath));
        } else if (subPath.contains("file3")) {
            assertEquals(Collections.singletonList("file3.txt"), result.get(subPath));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Test(org.junit.jupiter.api.Test)

Example 12 with HoodieLocalEngineContext

use of org.apache.hudi.common.engine.HoodieLocalEngineContext in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testOneLevelPartitionedTable.

@Test
public void testOneLevelPartitionedTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    ONE_LEVEL_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + ONE_LEVEL_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = ONE_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 13 with HoodieLocalEngineContext

use of org.apache.hudi.common.engine.HoodieLocalEngineContext in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testMultiLevelPartitionedTable.

@Test
public void testMultiLevelPartitionedTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    MULTI_LEVEL_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = MULTI_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 14 with HoodieLocalEngineContext

use of org.apache.hudi.common.engine.HoodieLocalEngineContext in project hudi by apache.

the class TimelineService method buildFileSystemViewManager.

public static FileSystemViewManager buildFileSystemViewManager(Config config, SerializableConfiguration conf) {
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(conf.get());
    // Just use defaults for now
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().build();
    switch(config.viewStorageType) {
        case MEMORY:
            FileSystemViewStorageConfig.Builder inMemConfBuilder = FileSystemViewStorageConfig.newBuilder();
            inMemConfBuilder.withStorageType(FileSystemViewStorageType.MEMORY);
            return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, inMemConfBuilder.build(), commonConfig);
        case SPILLABLE_DISK:
            {
                FileSystemViewStorageConfig.Builder spillableConfBuilder = FileSystemViewStorageConfig.newBuilder();
                spillableConfBuilder.withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).withBaseStoreDir(config.baseStorePathForFileGroups).withMaxMemoryForView(config.maxViewMemPerTableInMB * 1024 * 1024L).withMemFractionForPendingCompaction(config.memFractionForCompactionPerTable);
                return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, spillableConfBuilder.build(), commonConfig);
            }
        case EMBEDDED_KV_STORE:
            {
                FileSystemViewStorageConfig.Builder rocksDBConfBuilder = FileSystemViewStorageConfig.newBuilder();
                rocksDBConfBuilder.withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).withRocksDBPath(config.rocksDBPath);
                return FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, rocksDBConfBuilder.build(), commonConfig);
            }
        default:
            throw new IllegalArgumentException("Invalid view manager storage type :" + config.viewStorageType);
    }
}
Also used : FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext)

Example 15 with HoodieLocalEngineContext

use of org.apache.hudi.common.engine.HoodieLocalEngineContext in project hudi by apache.

the class TestTimelineServerBasedWriteMarkers method setup.

@BeforeEach
public void setup() throws IOException {
    initPath();
    initMetaClient();
    this.jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(TestTimelineServerBasedWriteMarkers.class.getName()));
    this.context = new HoodieSparkEngineContext(jsc);
    this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
    this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
    FileSystemViewStorageConfig storageConf = FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    try {
        timelineService = new TimelineService(localEngineContext, new Configuration(), TimelineService.Config.builder().serverPort(0).enableMarkerRequests(true).build(), FileSystem.get(new Configuration()), FileSystemViewManager.createViewManager(localEngineContext, metadataConfig, storageConf, HoodieCommonConfig.newBuilder().build()));
        timelineService.startService();
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    this.writeMarkers = new TimelineServerBasedWriteMarkers(metaClient.getBasePath(), markerFolderPath.toString(), "000", "localhost", timelineService.getServerPort(), 300);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) Configuration(org.apache.hadoop.conf.Configuration) TimelineService(org.apache.hudi.timeline.service.TimelineService) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) BeforeEach(org.junit.jupiter.api.BeforeEach)

Aggregations

HoodieLocalEngineContext (org.apache.hudi.common.engine.HoodieLocalEngineContext)20 Path (org.apache.hadoop.fs.Path)15 Map (java.util.Map)10 FileStatus (org.apache.hadoop.fs.FileStatus)10 Test (org.junit.jupiter.api.Test)10 IOException (java.io.IOException)9 List (java.util.List)9 SerializableConfiguration (org.apache.hudi.common.config.SerializableConfiguration)8 Arrays (java.util.Arrays)7 Collections (java.util.Collections)7 HoodieMetadataConfig (org.apache.hudi.common.config.HoodieMetadataConfig)7 BeforeEach (org.junit.jupiter.api.BeforeEach)7 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 HoodieCommonTestHarness (org.apache.hudi.common.testutils.HoodieCommonTestHarness)6 IntStream (java.util.stream.IntStream)5 HoodieTestTable (org.apache.hudi.common.testutils.HoodieTestTable)5 AfterEach (org.junit.jupiter.api.AfterEach)5 Assertions (org.junit.jupiter.api.Assertions)5 Configuration (org.apache.hadoop.conf.Configuration)4