use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieDeltaStreamer method testDistributedTestDataSource.
@Test
public void testDistributedTestDataSource() {
TypedProperties props = new TypedProperties();
props.setProperty(SourceConfigs.MAX_UNIQUE_RECORDS_PROP, "1000");
props.setProperty(SourceConfigs.NUM_SOURCE_PARTITIONS_PROP, "1");
props.setProperty(SourceConfigs.USE_ROCKSDB_FOR_TEST_DATAGEN_KEYS, "true");
DistributedTestDataSource distributedTestDataSource = new DistributedTestDataSource(props, jsc, sparkSession, null);
InputBatch<JavaRDD<GenericRecord>> batch = distributedTestDataSource.fetchNext(Option.empty(), 10000000);
batch.getBatch().get().cache();
long c = batch.getBatch().get().count();
assertEquals(1000, c);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieDeltaStreamer method testFetchingCheckpointFromPreviousCommits.
@Test
public void testFetchingCheckpointFromPreviousCommits() throws IOException {
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dfsBasePath + "/testFetchPreviousCheckpoint", WriteOperationType.BULK_INSERT);
TypedProperties properties = new TypedProperties();
properties.setProperty("hoodie.datasource.write.recordkey.field", "key");
properties.setProperty("hoodie.datasource.write.partitionpath.field", "pp");
TestDeltaSync testDeltaSync = new TestDeltaSync(cfg, sparkSession, null, properties, jsc, dfs, jsc.hadoopConfiguration(), null);
properties.put(HoodieTableConfig.NAME.key(), "sample_tbl");
HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), dfsBasePath, HoodieTableType.COPY_ON_WRITE, properties);
Map<String, String> extraMetadata = new HashMap<>();
extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "abc");
addCommitToTimeline(metaClient, extraMetadata);
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "abc");
extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "def");
addCommitToTimeline(metaClient, extraMetadata);
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
// add a replace commit which does not have CEHCKPOINT_KEY. Deltastreamer should be able to go back and pick the right checkpoint.
addReplaceCommitToTimeline(metaClient, Collections.emptyMap());
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline().getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieDeltaStreamer method prepareSqlSource.
private void prepareSqlSource() throws IOException {
String sourceRoot = dfsBasePath + "sqlSourceFiles";
TypedProperties sqlSourceProps = new TypedProperties();
sqlSourceProps.setProperty("include", "base.properties");
sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query", "select * from test_sql_table");
UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
// Data generation
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
generateSqlSourceTestTable(sourceRoot, "1", "1000", SQL_SOURCE_NUM_RECORDS, dataGenerator);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieDeltaStreamer method testKafkaConnectCheckpointProvider.
@Test
public void testKafkaConnectCheckpointProvider() throws IOException {
String tableBasePath = dfsBasePath + "/test_table";
String bootstrapPath = dfsBasePath + "/kafka_topic1";
String partitionPath = bootstrapPath + "/year=2016/month=05/day=01";
String filePath = partitionPath + "/kafka_topic1+0+100+200.parquet";
String checkpointProviderClass = "org.apache.hudi.utilities.checkpointing.KafkaConnectHdfsProvider";
HoodieDeltaStreamer.Config cfg = TestHelpers.makeDropAllConfig(tableBasePath, WriteOperationType.UPSERT);
TypedProperties props = new DFSPropertiesConfiguration(dfs.getConf(), new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getProps();
props.put("hoodie.deltastreamer.checkpoint.provider.path", bootstrapPath);
cfg.initialCheckpointProvider = checkpointProviderClass;
// create regular kafka connect hdfs dirs
dfs.mkdirs(new Path(bootstrapPath));
dfs.mkdirs(new Path(partitionPath));
// generate parquet files using kafka connect naming convention
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
Helpers.saveParquetToDFS(Helpers.toGenericRecords(dataGenerator.generateInserts("000", 100)), new Path(filePath));
HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc, dfs, hdfsTestService.getHadoopConf(), Option.ofNullable(props));
assertEquals("kafka_topic1,0:200", deltaStreamer.getConfig().checkpoint);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieDeltaStreamer method testJdbcSourceIncrementalFetchInContinuousMode.
@Test
public void testJdbcSourceIncrementalFetchInContinuousMode() {
try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
TypedProperties props = new TypedProperties();
props.setProperty("hoodie.deltastreamer.jdbc.url", "jdbc:h2:mem:test_mem");
props.setProperty("hoodie.deltastreamer.jdbc.driver.class", "org.h2.Driver");
props.setProperty("hoodie.deltastreamer.jdbc.user", "test");
props.setProperty("hoodie.deltastreamer.jdbc.password", "jdbc");
props.setProperty("hoodie.deltastreamer.jdbc.table.name", "triprec");
props.setProperty("hoodie.deltastreamer.jdbc.incr.pull", "true");
props.setProperty("hoodie.deltastreamer.jdbc.table.incr.column.name", "id");
props.setProperty("hoodie.datasource.write.keygenerator.class", SimpleKeyGenerator.class.getName());
props.setProperty("hoodie.datasource.write.recordkey.field", "ID");
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-jdbc-source.properties");
int numRecords = 1000;
int sourceLimit = 100;
String tableBasePath = dfsBasePath + "/triprec";
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, JdbcSource.class.getName(), null, "test-jdbc-source.properties", false, false, sourceLimit, false, null, null, "timestamp", null);
cfg.continuousMode = true;
// Add 1000 records
JdbcTestUtils.clearAndInsert("000", numRecords, connection, new HoodieTestDataGenerator(), props);
HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(deltaStreamer, cfg, (r) -> {
TestHelpers.assertAtleastNCompactionCommits(numRecords / sourceLimit + ((numRecords % sourceLimit == 0) ? 0 : 1), tableBasePath, dfs);
TestHelpers.assertRecordCount(numRecords, tableBasePath + "/*/*.parquet", sqlContext);
return true;
});
} catch (Exception e) {
fail(e.getMessage());
}
}
Aggregations