Search in sources :

Example 6 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testDatePartitionedTableWithAssumeDateIsFalse.

/**
 * Test listing of partitions result for date based partitions with assumeDataPartitioning = false.
 * @throws Exception
 */
@Test
public void testDatePartitionedTableWithAssumeDateIsFalse() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    DATE_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 7 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testDatePartitionedTable.

/**
 * Test listing of partitions result for date based partitions.
 * @throws Exception
 */
@Test
public void testDatePartitionedTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    DATE_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, true);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + DATE_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 8 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class TestFileSystemBackedTableMetadata method testMultiLevelEmptyPartitionTable.

@Test
public void testMultiLevelEmptyPartitionTable() throws Exception {
    String instant = "100";
    hoodieTestTable = hoodieTestTable.addCommit(instant);
    // Generate 10 files under each partition
    MULTI_LEVEL_PARTITIONS.stream().forEach(p -> {
        try {
            hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
    FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
    Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
    Assertions.assertEquals(0, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length);
    List<String> fullPartitionPaths = MULTI_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    for (String p : fullPartitionPaths) {
        Assertions.assertEquals(0, partitionToFilesMap.get(p).length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Assertions(org.junit.jupiter.api.Assertions) Path(org.apache.hadoop.fs.Path) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) Collections(java.util.Collections) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) IOException(java.io.IOException) Test(org.junit.jupiter.api.Test)

Example 9 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class BucketAssignFunction method open.

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);
    HoodieWriteConfig writeConfig = StreamerUtil.getHoodieClientConfig(this.conf, true);
    HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(StreamerUtil.getHadoopConf()), new FlinkTaskContextSupplier(getRuntimeContext()));
    this.bucketAssigner = BucketAssigners.create(getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getMaxNumberOfParallelSubtasks(), getRuntimeContext().getNumberOfParallelSubtasks(), ignoreSmallFiles(), HoodieTableType.valueOf(conf.getString(FlinkOptions.TABLE_TYPE)), context, writeConfig);
    this.payloadCreation = PayloadCreation.instance(this.conf);
}
Also used : SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFlinkEngineContext(org.apache.hudi.client.common.HoodieFlinkEngineContext) FlinkTaskContextSupplier(org.apache.hudi.client.FlinkTaskContextSupplier)

Example 10 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class TimelineService method main.

public static void main(String[] args) throws Exception {
    final Config cfg = new Config();
    JCommander cmd = new JCommander(cfg, null, args);
    if (cfg.help) {
        cmd.usage();
        System.exit(1);
    }
    Configuration conf = FSUtils.prepareHadoopConf(new Configuration());
    FileSystemViewManager viewManager = buildFileSystemViewManager(cfg, new SerializableConfiguration(conf));
    TimelineService service = new TimelineService(new HoodieLocalEngineContext(FSUtils.prepareHadoopConf(new Configuration())), new Configuration(), cfg, FileSystem.get(new Configuration()), viewManager);
    service.run();
}
Also used : FileSystemViewManager(org.apache.hudi.common.table.view.FileSystemViewManager) Configuration(org.apache.hadoop.conf.Configuration) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) JCommander(com.beust.jcommander.JCommander) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext)

Aggregations

SerializableConfiguration (org.apache.hudi.common.config.SerializableConfiguration)32 Path (org.apache.hadoop.fs.Path)20 FileSystem (org.apache.hadoop.fs.FileSystem)16 FileStatus (org.apache.hadoop.fs.FileStatus)15 List (java.util.List)14 IOException (java.io.IOException)13 Collectors (java.util.stream.Collectors)13 Map (java.util.Map)12 Test (org.junit.jupiter.api.Test)12 ArrayList (java.util.ArrayList)11 LogManager (org.apache.log4j.LogManager)10 Logger (org.apache.log4j.Logger)10 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)9 Option (org.apache.hudi.common.util.Option)9 Arrays (java.util.Arrays)8 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)8 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)7 Collections (java.util.Collections)6 Configuration (org.apache.hadoop.conf.Configuration)6