use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class AbstractConnectWriter method writeRecord.
@Override
public void writeRecord(SinkRecord record) throws IOException {
AvroConvertor convertor = new AvroConvertor(schemaProvider.getSourceSchema());
Option<GenericRecord> avroRecord;
switch(connectConfigs.getKafkaValueConverter()) {
case KAFKA_AVRO_CONVERTER:
avroRecord = Option.of((GenericRecord) record.value());
break;
case KAFKA_STRING_CONVERTER:
avroRecord = Option.of(convertor.fromJson((String) record.value()));
break;
case KAFKA_JSON_CONVERTER:
throw new UnsupportedEncodingException("Currently JSON objects are not supported");
default:
throw new IOException("Unsupported Kafka Format type (" + connectConfigs.getKafkaValueConverter() + ")");
}
// Tag records with a file ID based on kafka partition and hudi partition.
HoodieRecord<?> hoodieRecord = new HoodieAvroRecord<>(keyGenerator.getKey(avroRecord.get()), new HoodieAvroPayload(avroRecord));
String fileId = KafkaConnectUtils.hashDigest(String.format("%s-%s", record.kafkaPartition(), hoodieRecord.getPartitionPath()));
hoodieRecord.unseal();
hoodieRecord.setCurrentLocation(new HoodieRecordLocation(instantTime, fileId));
hoodieRecord.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
hoodieRecord.seal();
writeHudiRecord(hoodieRecord);
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestHoodieHFileReaderWriter method testWriteReadHFile.
@ParameterizedTest
@MethodSource("populateMetaFieldsAndTestAvroWithMeta")
public void testWriteReadHFile(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception {
Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc");
HoodieHFileWriter writer = createHFileWriter(avroSchema, populateMetaFields);
List<String> keys = new ArrayList<>();
Map<String, GenericRecord> recordMap = new HashMap<>();
for (int i = 0; i < 100; i++) {
GenericRecord record = new GenericData.Record(avroSchema);
String key = String.format("%s%04d", "key", i);
record.put("_row_key", key);
keys.add(key);
record.put("time", Integer.toString(RANDOM.nextInt()));
record.put("number", i);
if (testAvroWithMeta) {
writer.writeAvroWithMetadata(record, new HoodieAvroRecord(new HoodieKey((String) record.get("_row_key"), Integer.toString((Integer) record.get("number"))), // payload does not matter. GenericRecord passed in is what matters
new EmptyHoodieRecordPayload()));
// only HoodieKey will be looked up from the 2nd arg(HoodieRecord).
} else {
writer.writeAvro(key, record);
}
recordMap.put(key, record);
}
writer.close();
Configuration conf = new Configuration();
CacheConfig cacheConfig = new CacheConfig(conf);
HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(conf, filePath, cacheConfig, filePath.getFileSystem(conf));
List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
records.forEach(entry -> assertEquals(entry.getSecond(), recordMap.get(entry.getFirst())));
hoodieHFileReader.close();
for (int i = 0; i < 2; i++) {
int randomRowstoFetch = 5 + RANDOM.nextInt(10);
Set<String> rowsToFetch = getRandomKeys(randomRowstoFetch, keys);
List<String> rowsList = new ArrayList<>(rowsToFetch);
Collections.sort(rowsList);
hoodieHFileReader = new HoodieHFileReader(conf, filePath, cacheConfig, filePath.getFileSystem(conf));
List<Pair<String, GenericRecord>> result = hoodieHFileReader.readRecords(rowsList);
assertEquals(result.size(), randomRowstoFetch);
result.forEach(entry -> {
assertEquals(entry.getSecond(), recordMap.get(entry.getFirst()));
if (populateMetaFields && testAvroWithMeta) {
assertNotNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
} else {
assertNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
}
});
hoodieHFileReader.close();
}
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class HoodieTestDataGenerator method generateUniqueUpdatesStream.
/**
* Generates deduped updates of keys previously inserted, randomly distributed across the keys above.
*
* @param instantTime Commit Timestamp
* @param n Number of unique records
* @return stream of hoodie record updates
*/
public Stream<HoodieRecord> generateUniqueUpdatesStream(String instantTime, Integer n, String schemaStr) {
final Set<KeyPartition> used = new HashSet<>();
int numExistingKeys = numKeysBySchema.getOrDefault(schemaStr, 0);
Map<Integer, KeyPartition> existingKeys = existingKeysBySchema.get(schemaStr);
if (n > numExistingKeys) {
throw new IllegalArgumentException("Requested unique updates is greater than number of available keys");
}
return IntStream.range(0, n).boxed().map(i -> {
int index = numExistingKeys == 1 ? 0 : rand.nextInt(numExistingKeys - 1);
KeyPartition kp = existingKeys.get(index);
// Find the available keyPartition starting from randomly chosen one.
while (used.contains(kp)) {
index = (index + 1) % numExistingKeys;
kp = existingKeys.get(index);
}
logger.debug("key getting updated: " + kp.key.getRecordKey());
used.add(kp);
try {
return new HoodieAvroRecord(kp.key, generateRandomValueAsPerSchema(schemaStr, kp.key, instantTime, false));
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class HoodieTestDataGenerator method generateUniqueDeleteRecordStream.
/**
* Generates deduped delete records previously inserted, randomly distributed across the keys above.
*
* @param instantTime Commit Timestamp
* @param n Number of unique records
* @return stream of hoodie records for delete
*/
public Stream<HoodieRecord> generateUniqueDeleteRecordStream(String instantTime, Integer n) {
final Set<KeyPartition> used = new HashSet<>();
Map<Integer, KeyPartition> existingKeys = existingKeysBySchema.get(TRIP_EXAMPLE_SCHEMA);
Integer numExistingKeys = numKeysBySchema.get(TRIP_EXAMPLE_SCHEMA);
if (n > numExistingKeys) {
throw new IllegalArgumentException("Requested unique deletes is greater than number of available keys");
}
List<HoodieRecord> result = new ArrayList<>();
for (int i = 0; i < n; i++) {
int index = rand.nextInt(numExistingKeys);
while (!existingKeys.containsKey(index)) {
index = (index + 1) % numExistingKeys;
}
// swap chosen index with last index and remove last entry.
KeyPartition kp = existingKeys.remove(index);
existingKeys.put(index, existingKeys.get(numExistingKeys - 1));
existingKeys.remove(numExistingKeys - 1);
numExistingKeys--;
used.add(kp);
try {
result.add(new HoodieAvroRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
}
numKeysBySchema.put(TRIP_EXAMPLE_SCHEMA, numExistingKeys);
return result.stream();
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class HoodieTestDataGenerator method generateUpdatesWithTS.
public List<HoodieRecord> generateUpdatesWithTS(String instantTime, List<HoodieRecord> baseRecords, int ts) throws IOException {
List<HoodieRecord> updates = new ArrayList<>();
for (HoodieRecord baseRecord : baseRecords) {
HoodieRecord record = new HoodieAvroRecord(baseRecord.getKey(), generateRandomValue(baseRecord.getKey(), instantTime, false, ts));
updates.add(record);
}
return updates;
}
Aggregations