use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.
the class TestExternalSpillableMap method testAllMapOperations.
@ParameterizedTest
@MethodSource("testArguments")
public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
// insert a bunch of records so that values spill to disk too
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
IndexedRecord inMemoryRecord = iRecords.get(0);
String ikey = ((GenericRecord) inMemoryRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String iPartitionPath = ((GenericRecord) inMemoryRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord inMemoryHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(ikey, iPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) inMemoryRecord)));
IndexedRecord onDiskRecord = iRecords.get(99);
String dkey = ((GenericRecord) onDiskRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String dPartitionPath = ((GenericRecord) onDiskRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord onDiskHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(dkey, dPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) onDiskRecord)));
// assert size
assert records.size() == 100;
// get should return the same HoodieKey, same location and same value
assert inMemoryHoodieRecord.getKey().equals(records.get(ikey).getKey());
assert onDiskHoodieRecord.getKey().equals(records.get(dkey).getKey());
// compare the member variables of HoodieRecord not set by the constructor
assert records.get(ikey).getCurrentLocation().getFileId().equals(SpillableMapTestUtils.DUMMY_FILE_ID);
assert records.get(ikey).getCurrentLocation().getInstantTime().equals(SpillableMapTestUtils.DUMMY_COMMIT_TIME);
// test contains
assertTrue(records.containsKey(ikey));
assertTrue(records.containsKey(dkey));
// test isEmpty
assertFalse(records.isEmpty());
// test containsAll
assertTrue(records.keySet().containsAll(recordKeys));
// remove (from inMemory and onDisk)
HoodieRecord removedRecord = records.remove(ikey);
assertTrue(removedRecord != null);
assertFalse(records.containsKey(ikey));
removedRecord = records.remove(dkey);
assertTrue(removedRecord != null);
assertFalse(records.containsKey(dkey));
// test clear
records.clear();
assertTrue(records.size() == 0);
}
use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.
the class TestExternalSpillableMap method simpleTestWithException.
@ParameterizedTest
@MethodSource("testArguments")
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, failureOutputPath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
assert (recordKeys.size() == 100);
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
assertThrows(IOException.class, () -> {
while (itr.hasNext()) {
throw new IOException("Testing failures...");
}
});
}
use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.
the class TestExternalSpillableMap method testEstimationWithEmptyMap.
@Test
public void testEstimationWithEmptyMap() throws IOException, URISyntaxException {
final ExternalSpillableMap.DiskMapType diskMapType = ExternalSpillableMap.DiskMapType.BITCASK;
final boolean isCompressionEnabled = false;
final Schema schema = SchemaTestUtil.getSimpleSchema();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled);
List<String> recordKeys = new ArrayList<>();
// Put a single record. Payload size estimation happens as part of this initial put.
HoodieRecord seedRecord = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1).get(0);
records.put(seedRecord.getRecordKey(), seedRecord);
// Remove the key immediately to make the map empty again.
records.remove(seedRecord.getRecordKey());
// Verify payload size re-estimation does not throw exception
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 250);
hoodieRecords.stream().forEach(hoodieRecord -> {
assertDoesNotThrow(() -> {
records.put(hoodieRecord.getRecordKey(), hoodieRecord);
}, "ExternalSpillableMap put() should not throw exception!");
recordKeys.add(hoodieRecord.getRecordKey());
});
}
use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.
the class TestExternalSpillableMap method testSimpleUpsert.
@ParameterizedTest
@MethodSource("testArguments")
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
assert (recordKeys.size() == 100);
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
while (itr.hasNext()) {
HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
assert recordKeys.contains(rec.getRecordKey());
}
List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys, SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewInstantTime());
// update records already inserted
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
// make sure we have records spilled to disk
assertTrue(records.getDiskBasedMapNumEntries() > 0);
// iterate over the updated records and compare the value from Map
updatedRecords.forEach(record -> {
HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
try {
assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
} catch (IOException io) {
throw new UncheckedIOException(io);
}
});
}
use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.
the class HoodieMergeHandle method initializeIncomingRecordsMap.
/**
* Initialize a spillable map for incoming records.
*/
protected void initializeIncomingRecordsMap() {
try {
// Load the new records in a map
long memoryForMerge = IOUtils.getMaxMemoryPerPartitionMerge(taskContextSupplier, config);
LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(), new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(tableSchema), config.getCommonConfig().getSpillableDiskMapType(), config.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
} catch (IOException io) {
throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
}
}
Aggregations