Search in sources :

Example 31 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieDeltaStreamer method testDistributedTestDataSource.

@Test
public void testDistributedTestDataSource() {
    TypedProperties props = new TypedProperties();
    props.setProperty(SourceConfigs.MAX_UNIQUE_RECORDS_PROP, "1000");
    props.setProperty(SourceConfigs.NUM_SOURCE_PARTITIONS_PROP, "1");
    props.setProperty(SourceConfigs.USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS, "true");
    DistributedTestDataSource distributedTestDataSource = new DistributedTestDataSource(props, jsc, sparkSession, null);
    InputBatch<JavaRDD<GenericRecord>> batch = distributedTestDataSource.fetchNext(Option.empty(), 10000000);
    batch.getBatch().get().cache();
    long c = batch.getBatch().get().count();
    assertEquals(1000, c);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) DistributedTestDataSource(org.apache.hudi.utilities.testutils.sources.DistributedTestDataSource) JavaRDD(org.apache.spark.api.java.JavaRDD) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 32 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieDeltaStreamer method testFetchingCheckpointFromPreviousCommits.

@Test
public void testFetchingCheckpointFromPreviousCommits() throws IOException {
    HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dfsBasePath + "/testFetchPreviousCheckpoint", WriteOperationType.BULK_INSERT);
    TypedProperties properties = new TypedProperties();
    properties.setProperty("hoodie.datasource.write.recordkey.field", "key");
    properties.setProperty("hoodie.datasource.write.partitionpath.field", "pp");
    TestDeltaSync testDeltaSync = new TestDeltaSync(cfg, sparkSession, null, properties, jsc, dfs, jsc.hadoopConfiguration(), null);
    properties.put(HoodieTableConfig.NAME.key(), "sample_tbl");
    HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), dfsBasePath, HoodieTableType.COPY_ON_WRITE, properties);
    Map<String, String> extraMetadata = new HashMap<>();
    extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "abc");
    addCommitToTimeline(metaClient, extraMetadata);
    metaClient.reloadActiveTimeline();
    assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "abc");
    extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "def");
    addCommitToTimeline(metaClient, extraMetadata);
    metaClient.reloadActiveTimeline();
    assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
    // add a replace commit which does not have CEHCKPOINT_KEY. Deltastreamer should be able to go back and pick the right checkpoint.
    addReplaceCommitToTimeline(metaClient, Collections.emptyMap());
    metaClient.reloadActiveTimeline();
    assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) HashMap(java.util.HashMap) TypedProperties(org.apache.hudi.common.config.TypedProperties) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 33 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieDeltaStreamer method prepareSqlSource.

private void prepareSqlSource() throws IOException {
    String sourceRoot = dfsBasePath + "sqlSourceFiles";
    TypedProperties sqlSourceProps = new TypedProperties();
    sqlSourceProps.setProperty("include", "base.properties");
    sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
    sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
    sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
    sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query", "select * from test_sql_table");
    UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
    // Data generation
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    generateSqlSourceTestTable(sourceRoot, "1", "1000", SQL_SOURCE_NUM_RECORDS, dataGenerator);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator)

Example 34 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieDeltaStreamer method testKafkaConnectCheckpointProvider.

@Test
public void testKafkaConnectCheckpointProvider() throws IOException {
    String tableBasePath = dfsBasePath + "/test_table";
    String bootstrapPath = dfsBasePath + "/kafka_topic1";
    String partitionPath = bootstrapPath + "/year=2016/month=05/day=01";
    String filePath = partitionPath + "/kafka_topic1+0+100+200.parquet";
    String checkpointProviderClass = "org.apache.hudi.utilities.checkpointing.KafkaConnectHdfsProvider";
    HoodieDeltaStreamer.Config cfg = TestHelpers.makeDropAllConfig(tableBasePath, WriteOperationType.UPSERT);
    TypedProperties props = new DFSPropertiesConfiguration(dfs.getConf(), new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getProps();
    props.put("hoodie.deltastreamer.checkpoint.provider.path", bootstrapPath);
    cfg.initialCheckpointProvider = checkpointProviderClass;
    // create regular kafka connect hdfs dirs
    dfs.mkdirs(new Path(bootstrapPath));
    dfs.mkdirs(new Path(partitionPath));
    // generate parquet files using kafka connect naming convention
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    Helpers.saveParquetToDFS(Helpers.toGenericRecords(dataGenerator.generateInserts("000", 100)), new Path(filePath));
    HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc, dfs, hdfsTestService.getHadoopConf(), Option.ofNullable(props));
    assertEquals("kafka_topic1,0:200", deltaStreamer.getConfig().checkpoint);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) TypedProperties(org.apache.hudi.common.config.TypedProperties) DFSPropertiesConfiguration(org.apache.hudi.common.config.DFSPropertiesConfiguration) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 35 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieDeltaStreamer method testJdbcSourceIncrementalFetchInContinuousMode.

@Test
public void testJdbcSourceIncrementalFetchInContinuousMode() {
    try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
        TypedProperties props = new TypedProperties();
        props.setProperty("hoodie.deltastreamer.jdbc.url", "jdbc:h2:mem:test_mem");
        props.setProperty("hoodie.deltastreamer.jdbc.driver.class", "org.h2.Driver");
        props.setProperty("hoodie.deltastreamer.jdbc.user", "test");
        props.setProperty("hoodie.deltastreamer.jdbc.password", "jdbc");
        props.setProperty("hoodie.deltastreamer.jdbc.table.name", "triprec");
        props.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
        props.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "id");
        props.setProperty("hoodie.datasource.write.keygenerator.class", SimpleKeyGenerator.class.getName());
        props.setProperty("hoodie.datasource.write.recordkey.field", "ID");
        props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
        UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-jdbc-source.properties");
        int numRecords = 1000;
        int sourceLimit = 100;
        String tableBasePath = dfsBasePath + "/triprec";
        HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, JdbcSource.class.getName(), null, "test-jdbc-source.properties", false, false, sourceLimit, false, null, null, "timestamp", null);
        cfg.continuousMode = true;
        // Add 1000 records
        JdbcTestUtils.clearAndInsert("000", numRecords, connection, new HoodieTestDataGenerator(), props);
        HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc);
        deltaStreamerTestRunner(deltaStreamer, cfg, (r) -> {
            TestHelpers.assertAtleastNCompactionCommits(numRecords / sourceLimit + ((numRecords % sourceLimit == 0) ? 0 : 1), tableBasePath, dfs);
            TestHelpers.assertRecordCount(numRecords, tableBasePath + "/*/*.parquet", sqlContext);
            return true;
        });
    } catch (Exception e) {
        fail(e.getMessage());
    }
}
Also used : HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) JdbcSource(org.apache.hudi.utilities.sources.JdbcSource) Connection(java.sql.Connection) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) TopicExistsException(org.apache.kafka.common.errors.TopicExistsException) AnalysisException(org.apache.spark.sql.AnalysisException) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

TypedProperties (org.apache.hudi.common.config.TypedProperties)143 Test (org.junit.jupiter.api.Test)47 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)22 JavaRDD (org.apache.spark.api.java.JavaRDD)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 Path (org.apache.hadoop.fs.Path)14 Properties (java.util.Properties)13 GenericRecord (org.apache.avro.generic.GenericRecord)13 SourceFormatAdapter (org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter)12 Row (org.apache.spark.sql.Row)12 BeforeEach (org.junit.jupiter.api.BeforeEach)11 ArrayList (java.util.ArrayList)10 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 DFSPropertiesConfiguration (org.apache.hudi.common.config.DFSPropertiesConfiguration)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)8 Dataset (org.apache.spark.sql.Dataset)8 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)7