use of org.apache.hudi.utilities.deltastreamer.TableExecutionContext in project hudi by apache.
the class TestHoodieMultiTableDeltaStreamer method testMultiTableExecutionWithParquetSource.
@Test
public void testMultiTableExecutionWithParquetSource() throws IOException {
// ingest test data to 2 parquet source paths
String parquetSourceRoot1 = dfsBasePath + "/parquetSrcPath1/";
prepareParquetDFSFiles(10, parquetSourceRoot1);
String parquetSourceRoot2 = dfsBasePath + "/parquetSrcPath2/";
prepareParquetDFSFiles(5, parquetSourceRoot2);
// add only common props. later we can add per table props
String parquetPropsFile = populateCommonPropsAndWriteToFile();
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(parquetPropsFile, dfsBasePath + "/config", ParquetDFSSource.class.getName(), false, false, false, "multi_table_parquet", null);
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
// fetch per parquet source props and add per table properties
ingestPerParquetSourceProps(executionContexts, Arrays.asList(new String[] { parquetSourceRoot1, parquetSourceRoot2 }));
String targetBasePath1 = executionContexts.get(0).getConfig().targetBasePath;
String targetBasePath2 = executionContexts.get(1).getConfig().targetBasePath;
// sync and verify
syncAndVerify(streamer, targetBasePath1, targetBasePath2, 10, 5);
int totalTable1Records = 10;
int totalTable2Records = 5;
// ingest multiple rounds and verify
for (int i = 0; i < 3; i++) {
int table1Records = 10 + RANDOM.nextInt(100);
int table2Records = 15 + RANDOM.nextInt(100);
prepareParquetDFSFiles(table1Records, parquetSourceRoot1, (i + 2) + ".parquet", false, null, null);
prepareParquetDFSFiles(table2Records, parquetSourceRoot2, (i + 2) + ".parquet", false, null, null);
totalTable1Records += table1Records;
totalTable2Records += table2Records;
// sync and verify
syncAndVerify(streamer, targetBasePath1, targetBasePath2, totalTable1Records, totalTable2Records);
}
}
use of org.apache.hudi.utilities.deltastreamer.TableExecutionContext in project hudi by apache.
the class TestHoodieMultiTableDeltaStreamer method testMultiTableExecutionWithKafkaSource.
// 0 corresponds to fg
@Test
public void testMultiTableExecutionWithKafkaSource() throws IOException {
// create topics for each table
String topicName1 = "topic" + testNum++;
String topicName2 = "topic" + testNum;
testUtils.createTopic(topicName1, 2);
testUtils.createTopic(topicName2, 2);
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
testUtils.sendMessages(topicName1, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 5, HoodieTestDataGenerator.TRIP_SCHEMA)));
testUtils.sendMessages(topicName2, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 10, HoodieTestDataGenerator.SHORT_TRIP_SCHEMA)));
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", JsonKafkaSource.class.getName(), false, false, null);
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
TypedProperties properties = executionContexts.get(1).getProperties();
properties.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source_uber.avsc");
properties.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target_uber.avsc");
properties.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName2);
executionContexts.get(1).setProperties(properties);
TypedProperties properties1 = executionContexts.get(0).getProperties();
properties1.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source_short_trip_uber.avsc");
properties1.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target_short_trip_uber.avsc");
properties1.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName1);
executionContexts.get(0).setProperties(properties1);
String targetBasePath1 = executionContexts.get(0).getConfig().targetBasePath;
String targetBasePath2 = executionContexts.get(1).getConfig().targetBasePath;
streamer.sync();
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1 + "/*/*.parquet", sqlContext);
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2 + "/*/*.parquet", sqlContext);
// insert updates for already existing records in kafka topics
testUtils.sendMessages(topicName1, Helpers.jsonifyRecords(dataGenerator.generateUpdatesAsPerSchema("001", 5, HoodieTestDataGenerator.TRIP_SCHEMA)));
testUtils.sendMessages(topicName2, Helpers.jsonifyRecords(dataGenerator.generateUpdatesAsPerSchema("001", 10, HoodieTestDataGenerator.SHORT_TRIP_SCHEMA)));
streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
streamer.getTableExecutionContexts().get(1).setProperties(properties);
streamer.getTableExecutionContexts().get(0).setProperties(properties1);
streamer.sync();
assertEquals(2, streamer.getSuccessTables().size());
assertTrue(streamer.getFailedTables().isEmpty());
// assert the record count matches now
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, targetBasePath1 + "/*/*.parquet", sqlContext);
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(10, targetBasePath2 + "/*/*.parquet", sqlContext);
testNum++;
}
use of org.apache.hudi.utilities.deltastreamer.TableExecutionContext in project hudi by apache.
the class TestHoodieMultiTableDeltaStreamer method testCustomConfigProps.
@Test
public void testCustomConfigProps() throws IOException {
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, SchemaRegistryProvider.class);
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
TableExecutionContext executionContext = streamer.getTableExecutionContexts().get(1);
assertEquals(2, streamer.getTableExecutionContexts().size());
assertEquals(dfsBasePath + "/multi_table_dataset/uber_db/dummy_table_uber", executionContext.getConfig().targetBasePath);
assertEquals("uber_db.dummy_table_uber", executionContext.getConfig().targetTableName);
assertEquals("topic1", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.KAFKA_TOPIC_PROP));
assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key()));
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key()));
assertEquals("uber_hive_dummy_table", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.HIVE_SYNC_TABLE_PROP));
assertEquals("http://localhost:8081/subjects/random-value/versions/latest", executionContext.getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
assertEquals("http://localhost:8081/subjects/topic2-value/versions/latest", streamer.getTableExecutionContexts().get(0).getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
}
use of org.apache.hudi.utilities.deltastreamer.TableExecutionContext in project hudi by apache.
the class TestHoodieMultiTableDeltaStreamer method testTableLevelProperties.
@Test
public void testTableLevelProperties() throws IOException {
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, null);
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
List<TableExecutionContext> tableExecutionContexts = streamer.getTableExecutionContexts();
tableExecutionContexts.forEach(tableExecutionContext -> {
switch(tableExecutionContext.getTableName()) {
case "dummy_table_short_trip":
String tableLevelKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key());
assertEquals(TestHoodieDeltaStreamer.TestTableLevelGenerator.class.getName(), tableLevelKeyGeneratorClass);
break;
default:
String defaultKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key());
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), defaultKeyGeneratorClass);
}
});
}
Aggregations