Search in sources :

Example 1 with HoodieMetadataMergedLogRecordReader

use of org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader in project hudi by apache.

the class TestHoodieBackedMetadata method verifyMetadataMergedRecords.

/**
 * Verify the metadata table in-memory merged records. Irrespective of key deduplication
 * config, the in-memory merged records should always have the key field in the record
 * payload fully materialized.
 *
 * @param metadataMetaClient    - Metadata table meta client
 * @param logFilePaths          - Metadata table log file paths
 * @param latestCommitTimestamp
 * @param enableMetaFields      - Enable meta fields
 */
private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClient, List<String> logFilePaths, String latestCommitTimestamp, boolean enableMetaFields) {
    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
    if (enableMetaFields) {
        schema = HoodieAvroUtils.addMetadataFields(schema);
    }
    HoodieMetadataMergedLogRecordReader logRecordReader = HoodieMetadataMergedLogRecordReader.newBuilder().withFileSystem(metadataMetaClient.getFs()).withBasePath(metadataMetaClient.getBasePath()).withLogFilePaths(logFilePaths).withLatestInstantTime(latestCommitTimestamp).withPartition(MetadataPartitionType.FILES.getPartitionPath()).withReaderSchema(schema).withMaxMemorySizeInBytes(100000L).withBufferSize(4096).withSpillableMapBasePath(tempDir.toString()).withDiskMapType(ExternalSpillableMap.DiskMapType.BITCASK).build();
    assertDoesNotThrow(() -> {
        logRecordReader.scan();
    }, "Metadata log records materialization failed");
    for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
        assertFalse(entry.getKey().isEmpty());
        assertFalse(entry.getValue().getRecordKey().isEmpty());
        assertEquals(entry.getKey(), entry.getValue().getRecordKey());
    }
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieMetadataMergedLogRecordReader(org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader) Map(java.util.Map) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) HashMap(java.util.HashMap) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Example 2 with HoodieMetadataMergedLogRecordReader

use of org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader in project hudi by apache.

the class TestHoodieBackedTableMetadata method verifyMetadataMergedRecords.

/**
 * Verify the metadata table in-memory merged records. Irrespective of key deduplication
 * config, the in-memory merged records should always have the key field in the record
 * payload fully materialized.
 *
 * @param metadataMetaClient    - Metadata table meta client
 * @param logFilePaths          - Metadata table log file paths
 * @param latestCommitTimestamp - Latest commit timestamp
 */
private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClient, List<String> logFilePaths, String latestCommitTimestamp) {
    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
    HoodieMetadataMergedLogRecordReader logRecordReader = HoodieMetadataMergedLogRecordReader.newBuilder().withFileSystem(metadataMetaClient.getFs()).withBasePath(metadataMetaClient.getBasePath()).withLogFilePaths(logFilePaths).withLatestInstantTime(latestCommitTimestamp).withPartition(MetadataPartitionType.FILES.getPartitionPath()).withReaderSchema(schema).withMaxMemorySizeInBytes(100000L).withBufferSize(4096).withSpillableMapBasePath(tempDir.toString()).withDiskMapType(ExternalSpillableMap.DiskMapType.BITCASK).build();
    assertDoesNotThrow(() -> {
        logRecordReader.scan();
    }, "Metadata log records materialization failed");
    for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
        assertFalse(entry.getKey().isEmpty());
        assertFalse(entry.getValue().getRecordKey().isEmpty());
        assertEquals(entry.getKey(), entry.getValue().getRecordKey());
    }
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieMetadataMergedLogRecordReader(org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader) Map(java.util.Map) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Aggregations

Map (java.util.Map)2 Schema (org.apache.avro.Schema)2 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)2 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)2 ExternalSpillableMap (org.apache.hudi.common.util.collection.ExternalSpillableMap)2 HoodieMetadataMergedLogRecordReader (org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader)2 HashMap (java.util.HashMap)1