use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestExternalSpillableMap method simpleTestWithException.
@ParameterizedTest
@MethodSource("testArguments")
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, failureOutputPath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
assert (recordKeys.size() == 100);
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
assertThrows(IOException.class, () -> {
while (itr.hasNext()) {
throw new IOException("Testing failures...");
}
});
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestExternalSpillableMap method testEstimationWithEmptyMap.
@Test
public void testEstimationWithEmptyMap() throws IOException, URISyntaxException {
final ExternalSpillableMap.DiskMapType diskMapType = ExternalSpillableMap.DiskMapType.BITCASK;
final boolean isCompressionEnabled = false;
final Schema schema = SchemaTestUtil.getSimpleSchema();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled);
List<String> recordKeys = new ArrayList<>();
// Put a single record. Payload size estimation happens as part of this initial put.
HoodieRecord seedRecord = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1).get(0);
records.put(seedRecord.getRecordKey(), seedRecord);
// Remove the key immediately to make the map empty again.
records.remove(seedRecord.getRecordKey());
// Verify payload size re-estimation does not throw exception
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 250);
hoodieRecords.stream().forEach(hoodieRecord -> {
assertDoesNotThrow(() -> {
records.put(hoodieRecord.getRecordKey(), hoodieRecord);
}, "ExternalSpillableMap put() should not throw exception!");
recordKeys.add(hoodieRecord.getRecordKey());
});
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestExternalSpillableMap method testSimpleUpsert.
@ParameterizedTest
@MethodSource("testArguments")
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
assert (recordKeys.size() == 100);
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
while (itr.hasNext()) {
HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
assert recordKeys.contains(rec.getRecordKey());
}
List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys, SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewInstantTime());
// update records already inserted
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
// make sure we have records spilled to disk
assertTrue(records.getDiskBasedMapNumEntries() > 0);
// iterate over the updated records and compare the value from Map
updatedRecords.forEach(record -> {
HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
try {
assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
} catch (IOException io) {
throw new UncheckedIOException(io);
}
});
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class TestRocksDbDiskMap method testSimpleUpsert.
@Test
public void testSimpleUpsert() throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
RocksDbDiskMap rocksDBBasedMap = new RocksDbDiskMap<>(basePath);
List<IndexedRecord> insertedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(insertedRecords, rocksDBBasedMap);
String oldCommitTime = ((GenericRecord) insertedRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
// generate updates from inserts for first 50 keys / subset of keys
List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys.subList(0, 50), SchemaTestUtil.generateHoodieTestRecords(0, 50), HoodieActiveTimeline.createNewInstantTime());
String newCommitTime = ((GenericRecord) updatedRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
// perform upserts
List<String> updatedRecordKeys = SpillableMapTestUtils.upsertRecords(updatedRecords, rocksDBBasedMap);
// Upserted records (on disk) should have the latest commit time
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = rocksDBBasedMap.iterator();
while (itr.hasNext()) {
HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
try {
IndexedRecord indexedRecord = (IndexedRecord) rec.getData().getInsertValue(schema).get();
String latestCommitTime = ((GenericRecord) indexedRecord).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
assert recordKeys.contains(rec.getRecordKey()) || updatedRecordKeys.contains(rec.getRecordKey());
assertEquals(latestCommitTime, updatedRecordKeys.contains(rec.getRecordKey()) ? newCommitTime : oldCommitTime);
} catch (IOException io) {
throw new UncheckedIOException(io);
}
}
}
use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.
the class HoodieMergedLogRecordScanner method processNextRecord.
@Override
protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
String key = hoodieRecord.getRecordKey();
if (records.containsKey(key)) {
// Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
// done when a delete (empty payload) is encountered before or after an insert/update.
HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
HoodieRecordPayload oldValue = oldRecord.getData();
HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue);
// If combinedValue is oldValue, no need rePut oldRecord
if (combinedValue != oldValue) {
HoodieOperation operation = hoodieRecord.getOperation();
records.put(key, new HoodieAvroRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
}
} else {
// Put the record as is
records.put(key, hoodieRecord);
}
}
Aggregations