use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestDFSPathSelectorCommonMethods method setUp.
@BeforeEach
void setUp() {
initSparkContexts();
initPath();
initFileSystem();
props = new TypedProperties();
props.setProperty(ROOT_INPUT_PATH_PROP, basePath);
props.setProperty(PARTITIONS_LIST_PARALLELISM, "1");
inputPath = new Path(basePath);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestDatePartitionPathSelector method testPruneDatePartitionPaths.
@ParameterizedTest(name = "[{index}] {0}")
@MethodSource("configParams")
public void testPruneDatePartitionPaths(String tableName, String dateFormat, int datePartitionDepth, int numPrevDaysToList, String currentDate, boolean isHiveStylePartition, int expectedNumFiles) throws IOException {
TypedProperties props = getProps(basePath + "/" + tableName, dateFormat, datePartitionDepth, numPrevDaysToList, currentDate);
DatePartitionPathSelector pathSelector = new DatePartitionPathSelector(props, jsc.hadoopConfiguration());
Path root = new Path(props.getString(ROOT_INPUT_PATH_PROP));
int totalDepthBeforeDatePartitions = props.getInteger(DATE_PARTITION_DEPTH) - 1;
// Create parent dir
List<Path> leafDirs = new ArrayList<>();
createParentDirsBeforeDatePartitions(root, generateRandomStrings(), totalDepthBeforeDatePartitions, leafDirs);
createDatePartitionsWithFiles(leafDirs, isHiveStylePartition, dateFormat);
List<String> paths = pathSelector.pruneDatePartitionPaths(context, fs, root.toString(), LocalDate.parse(currentDate));
assertEquals(expectedNumFiles, paths.size());
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestKafkaOffsetGen method getConsumerConfigs.
private TypedProperties getConsumerConfigs(String autoOffsetReset, String kafkaCheckpointType) {
TypedProperties props = new TypedProperties();
props.put("hoodie.deltastreamer.source.kafka.checkpoint.type", kafkaCheckpointType);
props.put("auto.offset.reset", autoOffsetReset);
props.put("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME);
props.setProperty("bootstrap.servers", testUtils.brokerAddress());
props.setProperty("key.deserializer", StringDeserializer.class.getName());
props.setProperty("value.deserializer", StringDeserializer.class.getName());
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
return props;
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestS3EventsMetaSelector method setUp.
@BeforeEach
void setUp() {
initSparkContexts();
initPath();
initFileSystem();
MockitoAnnotations.initMocks(this);
props = new TypedProperties();
sqsUrl = "test-queue";
props.setProperty(S3_SOURCE_QUEUE_URL, sqsUrl);
props.setProperty(S3_SOURCE_QUEUE_REGION, REGION_NAME);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class DistributedTestDataSource method fetchNewData.
@Override
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
String instantTime = String.format("%05d", nextCommitNum);
LOG.info("Source Limit is set to " + sourceLimit);
// No new data.
if (sourceLimit <= 0) {
return new InputBatch<>(Option.empty(), instantTime);
}
TypedProperties newProps = new TypedProperties();
newProps.putAll(props);
// Set the maxUniqueRecords per partition for TestDataSource
int maxUniqueRecords = props.getInteger(SourceConfigs.MAX_UNIQUE_RECORDS_PROP, SourceConfigs.DEFAULT_MAX_UNIQUE_RECORDS);
String maxUniqueRecordsPerPartition = String.valueOf(Math.max(1, maxUniqueRecords / numTestSourcePartitions));
newProps.setProperty(SourceConfigs.MAX_UNIQUE_RECORDS_PROP, maxUniqueRecordsPerPartition);
int perPartitionSourceLimit = Math.max(1, (int) (sourceLimit / numTestSourcePartitions));
JavaRDD<GenericRecord> avroRDD = sparkContext.parallelize(IntStream.range(0, numTestSourcePartitions).boxed().collect(Collectors.toList()), numTestSourcePartitions).mapPartitionsWithIndex((p, idx) -> {
LOG.info("Initializing source with newProps=" + newProps);
if (!dataGeneratorMap.containsKey(p)) {
initDataGen(newProps, p);
}
return fetchNextBatch(newProps, perPartitionSourceLimit, instantTime, p).iterator();
}, true);
return new InputBatch<>(Option.of(avroRDD), instantTime);
}
Aggregations