use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestHoodieBackedMetadata method verifyMetadataMergedRecords.
/**
* Verify the metadata table in-memory merged records. Irrespective of key deduplication
* config, the in-memory merged records should always have the key field in the record
* payload fully materialized.
*
* @param metadataMetaClient - Metadata table meta client
* @param logFilePaths - Metadata table log file paths
* @param latestCommitTimestamp
* @param enableMetaFields - Enable meta fields
*/
private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClient, List<String> logFilePaths, String latestCommitTimestamp, boolean enableMetaFields) {
Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
if (enableMetaFields) {
schema = HoodieAvroUtils.addMetadataFields(schema);
}
HoodieMetadataMergedLogRecordReader logRecordReader = HoodieMetadataMergedLogRecordReader.newBuilder().withFileSystem(metadataMetaClient.getFs()).withBasePath(metadataMetaClient.getBasePath()).withLogFilePaths(logFilePaths).withLatestInstantTime(latestCommitTimestamp).withPartition(MetadataPartitionType.FILES.getPartitionPath()).withReaderSchema(schema).withMaxMemorySizeInBytes(100000L).withBufferSize(4096).withSpillableMapBasePath(tempDir.toString()).withDiskMapType(ExternalSpillableMap.DiskMapType.BITCASK).build();
assertDoesNotThrow(() -> {
logRecordReader.scan();
}, "Metadata log records materialization failed");
for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
assertFalse(entry.getKey().isEmpty());
assertFalse(entry.getValue().getRecordKey().isEmpty());
assertEquals(entry.getKey(), entry.getValue().getRecordKey());
}
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestBulkInsertInternalPartitioner method testCustomColumnSortPartitioner.
@Test
public void testCustomColumnSortPartitioner() throws Exception {
String sortColumnString = "rider";
String[] sortColumns = sortColumnString.split(",");
Comparator<HoodieRecord<? extends HoodieRecordPayload>> columnComparator = getCustomColumnComparator(HoodieTestDataGenerator.AVRO_SCHEMA, sortColumns);
JavaRDD<HoodieRecord> records1 = generateTestRecordsForBulkInsert(jsc);
JavaRDD<HoodieRecord> records2 = generateTripleTestRecordsForBulkInsert(jsc);
testBulkInsertInternalPartitioner(new RDDCustomColumnsSortPartitioner(sortColumns, HoodieTestDataGenerator.AVRO_SCHEMA, false), records1, true, true, generateExpectedPartitionNumRecords(records1), Option.of(columnComparator));
testBulkInsertInternalPartitioner(new RDDCustomColumnsSortPartitioner(sortColumns, HoodieTestDataGenerator.AVRO_SCHEMA, false), records2, true, true, generateExpectedPartitionNumRecords(records2), Option.of(columnComparator));
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath("/").withSchema(TRIP_EXAMPLE_SCHEMA).withUserDefinedBulkInsertPartitionerClass(RDDCustomColumnsSortPartitioner.class.getName()).withUserDefinedBulkInsertPartitionerSortColumns(sortColumnString).build();
testBulkInsertInternalPartitioner(new RDDCustomColumnsSortPartitioner(config), records1, true, true, generateExpectedPartitionNumRecords(records1), Option.of(columnComparator));
testBulkInsertInternalPartitioner(new RDDCustomColumnsSortPartitioner(config), records2, true, true, generateExpectedPartitionNumRecords(records2), Option.of(columnComparator));
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestBulkInsertInternalPartitioner method testBulkInsertInternalPartitioner.
private void testBulkInsertInternalPartitioner(BulkInsertPartitioner partitioner, JavaRDD<HoodieRecord> records, boolean isGloballySorted, boolean isLocallySorted, Map<String, Long> expectedPartitionNumRecords, Option<Comparator<HoodieRecord<? extends HoodieRecordPayload>>> comparator) {
int numPartitions = 2;
JavaRDD<HoodieRecord<? extends HoodieRecordPayload>> actualRecords = (JavaRDD<HoodieRecord<? extends HoodieRecordPayload>>) partitioner.repartitionRecords(records, numPartitions);
assertEquals(numPartitions, actualRecords.getNumPartitions());
List<HoodieRecord<? extends HoodieRecordPayload>> collectedActualRecords = actualRecords.collect();
if (isGloballySorted) {
// Verify global order
verifyRecordAscendingOrder(collectedActualRecords, comparator);
} else if (isLocallySorted) {
// Verify local order
actualRecords.mapPartitions(partition -> {
List<HoodieRecord<? extends HoodieRecordPayload>> partitionRecords = new ArrayList<>();
partition.forEachRemaining(partitionRecords::add);
verifyRecordAscendingOrder(partitionRecords, comparator);
return Collections.emptyList().iterator();
}).collect();
}
// Verify number of records per partition path
Map<String, Long> actualPartitionNumRecords = new HashMap<>();
for (HoodieRecord record : collectedActualRecords) {
String partitionPath = record.getPartitionPath();
actualPartitionNumRecords.put(partitionPath, actualPartitionNumRecords.getOrDefault(partitionPath, 0L) + 1);
}
assertEquals(expectedPartitionNumRecords, actualPartitionNumRecords);
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class HoodieWriteableTestTable method appendRecordsToLogFile.
private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
String partitionPath = groupedRecords.get(0).getPartitionPath();
HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
try {
GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
return (IndexedRecord) val;
} catch (IOException e) {
LOG.warn("Failed to convert record " + r.toString(), e);
return null;
}
}).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
return Pair.of(partitionPath, logWriter.getLogFile());
}
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestExternalSpillableMap method testAllMapOperations.
@ParameterizedTest
@MethodSource("testArguments")
public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
// insert a bunch of records so that values spill to disk too
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
IndexedRecord inMemoryRecord = iRecords.get(0);
String ikey = ((GenericRecord) inMemoryRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String iPartitionPath = ((GenericRecord) inMemoryRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord inMemoryHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(ikey, iPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) inMemoryRecord)));
IndexedRecord onDiskRecord = iRecords.get(99);
String dkey = ((GenericRecord) onDiskRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String dPartitionPath = ((GenericRecord) onDiskRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord onDiskHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(dkey, dPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) onDiskRecord)));
// assert size
assert records.size() == 100;
// get should return the same HoodieKey, same location and same value
assert inMemoryHoodieRecord.getKey().equals(records.get(ikey).getKey());
assert onDiskHoodieRecord.getKey().equals(records.get(dkey).getKey());
// compare the member variables of HoodieRecord not set by the constructor
assert records.get(ikey).getCurrentLocation().getFileId().equals(SpillableMapTestUtils.DUMMY_FILE_ID);
assert records.get(ikey).getCurrentLocation().getInstantTime().equals(SpillableMapTestUtils.DUMMY_COMMIT_TIME);
// test contains
assertTrue(records.containsKey(ikey));
assertTrue(records.containsKey(dkey));
// test isEmpty
assertFalse(records.isEmpty());
// test containsAll
assertTrue(records.keySet().containsAll(recordKeys));
// remove (from inMemory and onDisk)
HoodieRecord removedRecord = records.remove(ikey);
assertTrue(removedRecord != null);
assertFalse(records.containsKey(ikey));
removedRecord = records.remove(dkey);
assertTrue(removedRecord != null);
assertFalse(records.containsKey(dkey));
// test clear
records.clear();
assertTrue(records.size() == 0);
}
Aggregations