Search in sources :

Example 41 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieMultiTableDeltaStreamer method populateCommonPropsAndWriteToFile.

private String populateCommonPropsAndWriteToFile() throws IOException {
    TypedProperties commonProps = new TypedProperties();
    populateCommonProps(commonProps, dfsBasePath);
    UtilitiesTestBase.Helpers.savePropsToDFS(commonProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_PARQUET);
    return PROPS_FILENAME_TEST_PARQUET;
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 42 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieMultiTableDeltaStreamer method ingestPerParquetSourceProps.

private void ingestPerParquetSourceProps(List<TableExecutionContext> executionContexts, List<String> parquetSourceRoots) {
    int counter = 0;
    for (String parquetSourceRoot : parquetSourceRoots) {
        TypedProperties properties = executionContexts.get(counter).getProperties();
        TypedProperties parquetProps = getParquetProps(parquetSourceRoot);
        parquetProps.forEach((k, v) -> {
            properties.setProperty(k.toString(), v.toString());
        });
        executionContexts.get(counter).setProperties(properties);
        counter++;
    }
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 43 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestKafkaConnectHdfsProvider method testMissingPartition.

@Test
public void testMissingPartition() throws Exception {
    Path topicPath = tempDir.resolve("topic2");
    Files.createDirectories(topicPath);
    // create regular kafka connect hdfs dirs
    new File(topicPath + "/year=2016/month=05/day=01/").mkdirs();
    new File(topicPath + "/year=2016/month=05/day=02/").mkdirs();
    // base files with missing partition
    new File(topicPath + "/year=2016/month=05/day=01/" + "topic1+0+100+200" + BASE_FILE_EXTENSION).createNewFile();
    new File(topicPath + "/year=2016/month=05/day=01/" + "topic1+2+100+200" + BASE_FILE_EXTENSION).createNewFile();
    new File(topicPath + "/year=2016/month=05/day=02/" + "topic1+0+201+300" + BASE_FILE_EXTENSION).createNewFile();
    final TypedProperties props = new TypedProperties();
    props.put("hoodie.deltastreamer.checkpoint.provider.path", topicPath.toString());
    final InitialCheckPointProvider provider = new KafkaConnectHdfsProvider(props);
    provider.init(HoodieTestUtils.getDefaultHadoopConf());
    assertThrows(HoodieException.class, provider::getCheckpoint);
}
Also used : Path(java.nio.file.Path) TypedProperties(org.apache.hudi.common.config.TypedProperties) File(java.io.File) Test(org.junit.jupiter.api.Test)

Example 44 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestKafkaConnectHdfsProvider method testValidKafkaConnectPath.

@Test
public void testValidKafkaConnectPath() throws Exception {
    // a standard format(time based partition) of the files managed by kafka connect is:
    // topic/year=xxx/month=xxx/day=xxx/topic+partition+lowerOffset+upperOffset.file
    Path topicPath = tempDir.resolve("topic1");
    Files.createDirectories(topicPath);
    // create regular kafka connect hdfs dirs
    new File(topicPath + "/year=2016/month=05/day=01/").mkdirs();
    new File(topicPath + "/year=2016/month=05/day=02/").mkdirs();
    // kafka connect tmp folder
    new File(topicPath + "/TMP").mkdirs();
    // tmp file that being written
    new File(topicPath + "/TMP/" + "topic1+0+301+400" + BASE_FILE_EXTENSION).createNewFile();
    // regular base files
    new File(topicPath + "/year=2016/month=05/day=01/" + "topic1+0+100+200" + BASE_FILE_EXTENSION).createNewFile();
    new File(topicPath + "/year=2016/month=05/day=01/" + "topic1+1+100+200" + BASE_FILE_EXTENSION).createNewFile();
    new File(topicPath + "/year=2016/month=05/day=02/" + "topic1+0+201+300" + BASE_FILE_EXTENSION).createNewFile();
    // noise base file
    new File(topicPath + "/year=2016/month=05/day=01/" + "random_snappy_1" + BASE_FILE_EXTENSION).createNewFile();
    new File(topicPath + "/year=2016/month=05/day=02/" + "random_snappy_2" + BASE_FILE_EXTENSION).createNewFile();
    final TypedProperties props = new TypedProperties();
    props.put("hoodie.deltastreamer.checkpoint.provider.path", topicPath.toString());
    final InitialCheckPointProvider provider = new KafkaConnectHdfsProvider(props);
    provider.init(HoodieTestUtils.getDefaultHadoopConf());
    assertEquals("topic1,0:300,1:200", provider.getCheckpoint());
}
Also used : Path(java.nio.file.Path) TypedProperties(org.apache.hudi.common.config.TypedProperties) File(java.io.File) Test(org.junit.jupiter.api.Test)

Example 45 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class HoodieDeltaStreamerTestBase method prepareInitialConfigs.

protected static void prepareInitialConfigs(FileSystem dfs, String dfsBasePath, String brokerAddress) throws IOException {
    // prepare the configs.
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/base.properties", dfs, dfsBasePath + "/base.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/base.properties", dfs, dfsBasePath + "/config/base.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/sql-transformer.properties", dfs, dfsBasePath + "/sql-transformer.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source.avsc", dfs, dfsBasePath + "/source.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source_evolved.avsc", dfs, dfsBasePath + "/source_evolved.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source-flattened.avsc", dfs, dfsBasePath + "/source-flattened.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target.avsc", dfs, dfsBasePath + "/target.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target-flattened.avsc", dfs, dfsBasePath + "/target-flattened.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source_short_trip_uber.avsc", dfs, dfsBasePath + "/source_short_trip_uber.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source_uber.avsc", dfs, dfsBasePath + "/source_uber.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target_short_trip_uber.avsc", dfs, dfsBasePath + "/target_short_trip_uber.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target_uber.avsc", dfs, dfsBasePath + "/target_uber.avsc");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/invalid_hive_sync_uber_config.properties", dfs, dfsBasePath + "/config/invalid_hive_sync_uber_config.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/uber_config.properties", dfs, dfsBasePath + "/config/uber_config.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/short_trip_uber_config.properties", dfs, dfsBasePath + "/config/short_trip_uber_config.properties");
    UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/clusteringjob.properties", dfs, dfsBasePath + "/clusteringjob.properties");
    writeCommonPropsToFile(dfs, dfsBasePath);
    // Properties used for the delta-streamer which incrementally pulls from upstream Hudi source table and writes to
    // downstream hudi table
    TypedProperties downstreamProps = new TypedProperties();
    downstreamProps.setProperty("include", "base.properties");
    downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
    downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
    // Source schema is the target schema of upstream table
    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
    UtilitiesTestBase.Helpers.savePropsToDFS(downstreamProps, dfs, dfsBasePath + "/test-downstream-source.properties");
    // Properties used for testing invalid key generator
    TypedProperties invalidProps = new TypedProperties();
    invalidProps.setProperty("include", "sql-transformer.properties");
    invalidProps.setProperty("hoodie.datasource.write.keygenerator.class", "invalid");
    invalidProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
    invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
    invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
    invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
    UtilitiesTestBase.Helpers.savePropsToDFS(invalidProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_INVALID);
    TypedProperties props1 = new TypedProperties();
    populateAllCommonProps(props1, dfsBasePath, brokerAddress);
    UtilitiesTestBase.Helpers.savePropsToDFS(props1, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE1);
    TypedProperties properties = new TypedProperties();
    populateInvalidTableConfigFilePathProps(properties, dfsBasePath);
    UtilitiesTestBase.Helpers.savePropsToDFS(properties, dfs, dfsBasePath + "/" + PROPS_INVALID_TABLE_CONFIG_FILE);
    TypedProperties invalidHiveSyncProps = new TypedProperties();
    invalidHiveSyncProps.setProperty("hoodie.deltastreamer.ingestion.tablesToBeIngested", "uber_db.dummy_table_uber");
    invalidHiveSyncProps.setProperty("hoodie.deltastreamer.ingestion.uber_db.dummy_table_uber.configFile", dfsBasePath + "/config/invalid_hive_sync_uber_config.properties");
    UtilitiesTestBase.Helpers.savePropsToDFS(invalidHiveSyncProps, dfs, dfsBasePath + "/" + PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties)

Aggregations

TypedProperties (org.apache.hudi.common.config.TypedProperties)143 Test (org.junit.jupiter.api.Test)47 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)22 JavaRDD (org.apache.spark.api.java.JavaRDD)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 Path (org.apache.hadoop.fs.Path)14 Properties (java.util.Properties)13 GenericRecord (org.apache.avro.generic.GenericRecord)13 SourceFormatAdapter (org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter)12 Row (org.apache.spark.sql.Row)12 BeforeEach (org.junit.jupiter.api.BeforeEach)11 ArrayList (java.util.ArrayList)10 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 DFSPropertiesConfiguration (org.apache.hudi.common.config.DFSPropertiesConfiguration)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)8 Dataset (org.apache.spark.sql.Dataset)8 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)7