use of org.apache.hudi.keygen.KeyGenerator in project hudi by apache.
the class TestCreateAvroKeyGeneratorByTypeWithFactory method testKeyGeneratorTypes.
@ParameterizedTest
@MethodSource("configParams")
public void testKeyGeneratorTypes(String keyGenType) throws IOException {
props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), keyGenType);
KeyGeneratorType keyType = KeyGeneratorType.valueOf(keyGenType);
KeyGenerator keyGenerator = HoodieAvroKeyGeneratorFactory.createKeyGenerator(props);
switch(keyType) {
case SIMPLE:
Assertions.assertEquals(SimpleAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
case COMPLEX:
Assertions.assertEquals(ComplexAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
case TIMESTAMP:
Assertions.assertEquals(TimestampBasedAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
case CUSTOM:
Assertions.assertEquals(CustomAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
case NON_PARTITION:
Assertions.assertEquals(NonpartitionedAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
case GLOBAL_DELETE:
Assertions.assertEquals(GlobalAvroDeleteKeyGenerator.class.getName(), keyGenerator.getClass().getName());
return;
default:
throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGenType);
}
}
use of org.apache.hudi.keygen.KeyGenerator in project hudi by apache.
the class TestHoodieSparkKeyGeneratorFactory method testKeyGeneratorFactory.
@Test
public void testKeyGeneratorFactory() throws IOException {
TypedProperties props = getCommonProps();
// set KeyGenerator type only
props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), KeyGeneratorType.SIMPLE.name());
KeyGenerator keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator.getClass().getName());
// set KeyGenerator class only
props = getCommonProps();
props.put(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key(), SimpleKeyGenerator.class.getName());
KeyGenerator keyGenerator2 = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator2.getClass().getName());
// set both class name and keyGenerator type
props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), KeyGeneratorType.CUSTOM.name());
KeyGenerator keyGenerator3 = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
// KEYGENERATOR_TYPE_PROP was overwritten by KEYGENERATOR_CLASS_PROP
Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator3.getClass().getName());
// set wrong class name
final TypedProperties props2 = getCommonProps();
props2.put(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key(), TestHoodieSparkKeyGeneratorFactory.class.getName());
assertThrows(IOException.class, () -> HoodieSparkKeyGeneratorFactory.createKeyGenerator(props2));
// set wrong keyGenerator type
final TypedProperties props3 = getCommonProps();
props3.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), "wrong_type");
assertThrows(HoodieKeyGeneratorException.class, () -> HoodieSparkKeyGeneratorFactory.createKeyGenerator(props3));
}
use of org.apache.hudi.keygen.KeyGenerator in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method verifyRecordsWritten.
/**
* Verify data in base files matches expected records and commit time.
*/
private void verifyRecordsWritten(String commitTime, boolean populateMetadataField, List<HoodieRecord> expectedRecords, List<WriteStatus> allStatus, HoodieWriteConfig config) throws IOException {
List<GenericRecord> records = new ArrayList<>();
Set<String> expectedKeys = verifyRecordKeys(expectedRecords, allStatus, records);
if (config.populateMetaFields()) {
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertEquals(commitTime, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
assertTrue(expectedKeys.contains(recordKey));
}
} else {
KeyGenerator keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(config.getProps()));
for (GenericRecord record : records) {
String recordKey = keyGenerator.getKey(record).getRecordKey();
if (!populateMetadataField) {
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
}
assertTrue(expectedKeys.contains(recordKey));
}
}
}
use of org.apache.hudi.keygen.KeyGenerator in project hudi by apache.
the class TestAbstractConnectWriter method testAbstractWriterForAllFormats.
@ParameterizedTest
@EnumSource(value = TestInputFormats.class)
public void testAbstractWriterForAllFormats(TestInputFormats inputFormats) throws Exception {
Schema schema = schemaProvider.getSourceSchema();
List<?> inputRecords;
List<HoodieRecord> expectedRecords;
String formatConverter;
switch(inputFormats) {
case JSON_STRING:
formatConverter = AbstractConnectWriter.KAFKA_STRING_CONVERTER;
GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(schema, schema);
inputRecords = SchemaTestUtil.generateTestJsonRecords(0, NUM_RECORDS);
expectedRecords = ((List<String>) inputRecords).stream().map(s -> {
try {
return HoodieAvroUtils.rewriteRecord((GenericRecord) reader.read(null, DecoderFactory.get().jsonDecoder(schema, s)), schema);
} catch (IOException exception) {
throw new HoodieException("Error converting JSON records to AVRO");
}
}).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
break;
case AVRO:
formatConverter = AbstractConnectWriter.KAFKA_AVRO_CONVERTER;
inputRecords = SchemaTestUtil.generateTestRecords(0, NUM_RECORDS);
expectedRecords = inputRecords.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, schema)).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
break;
default:
throw new HoodieException("Unknown test scenario " + inputFormats);
}
configs = KafkaConnectConfigs.newBuilder().withProperties(Collections.singletonMap(KafkaConnectConfigs.KAFKA_VALUE_CONVERTER, formatConverter)).build();
AbstractHudiConnectWriterTestWrapper writer = new AbstractHudiConnectWriterTestWrapper(configs, keyGenerator, schemaProvider);
for (int i = 0; i < NUM_RECORDS; i++) {
writer.writeRecord(getNextKafkaRecord(inputRecords.get(i)));
}
validateRecords(writer.getWrittenRecords(), expectedRecords);
}
use of org.apache.hudi.keygen.KeyGenerator in project hudi by apache.
the class SparkFullBootstrapDataProviderBase method generateInputRecords.
@Override
public JavaRDD<HoodieRecord> generateInputRecords(String tableName, String sourceBasePath, List<Pair<String, List<HoodieFileStatus>>> partitionPathsWithFiles) {
String[] filePaths = partitionPathsWithFiles.stream().map(Pair::getValue).flatMap(f -> f.stream().map(fs -> FileStatusUtils.toPath(fs.getPath()).toString())).toArray(String[]::new);
Dataset inputDataset = sparkSession.read().format(getFormat()).load(filePaths);
try {
KeyGenerator keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
String structName = tableName + "_record";
String namespace = "hoodie." + tableName;
RDD<GenericRecord> genericRecords = HoodieSparkUtils.createRdd(inputDataset, structName, namespace, false, Option.empty());
return genericRecords.toJavaRDD().map(gr -> {
String orderingVal = HoodieAvroUtils.getNestedFieldValAsString(gr, props.getString("hoodie.datasource.write.precombine.field"), false, props.getBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue())));
try {
return DataSourceUtils.createHoodieRecord(gr, orderingVal, keyGenerator.getKey(gr), props.getString("hoodie.datasource.write.payload.class"));
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
});
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
}
Aggregations