Search in sources :

Example 6 with DefaultSizeEstimator

use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.

the class TestExternalSpillableMap method testAllMapOperations.

@ParameterizedTest
@MethodSource("testArguments")
public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    String payloadClazz = HoodieAvroPayload.class.getName();
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
    isCompressionEnabled);
    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    // insert a bunch of records so that values spill to disk too
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
    IndexedRecord inMemoryRecord = iRecords.get(0);
    String ikey = ((GenericRecord) inMemoryRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
    String iPartitionPath = ((GenericRecord) inMemoryRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
    HoodieRecord inMemoryHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(ikey, iPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) inMemoryRecord)));
    IndexedRecord onDiskRecord = iRecords.get(99);
    String dkey = ((GenericRecord) onDiskRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
    String dPartitionPath = ((GenericRecord) onDiskRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
    HoodieRecord onDiskHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(dkey, dPartitionPath), new HoodieAvroPayload(Option.of((GenericRecord) onDiskRecord)));
    // assert size
    assert records.size() == 100;
    // get should return the same HoodieKey, same location and same value
    assert inMemoryHoodieRecord.getKey().equals(records.get(ikey).getKey());
    assert onDiskHoodieRecord.getKey().equals(records.get(dkey).getKey());
    // compare the member variables of HoodieRecord not set by the constructor
    assert records.get(ikey).getCurrentLocation().getFileId().equals(SpillableMapTestUtils.DUMMY_FILE_ID);
    assert records.get(ikey).getCurrentLocation().getInstantTime().equals(SpillableMapTestUtils.DUMMY_COMMIT_TIME);
    // test contains
    assertTrue(records.containsKey(ikey));
    assertTrue(records.containsKey(dkey));
    // test isEmpty
    assertFalse(records.isEmpty());
    // test containsAll
    assertTrue(records.keySet().containsAll(recordKeys));
    // remove (from inMemory and onDisk)
    HoodieRecord removedRecord = records.remove(ikey);
    assertTrue(removedRecord != null);
    assertFalse(records.containsKey(ikey));
    removedRecord = records.remove(dkey);
    assertTrue(removedRecord != null);
    assertFalse(records.containsKey(dkey));
    // test clear
    records.clear();
    assertTrue(records.size() == 0);
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 7 with DefaultSizeEstimator

use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.

the class TestExternalSpillableMap method simpleTestWithException.

@ParameterizedTest
@MethodSource("testArguments")
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, failureOutputPath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
    isCompressionEnabled);
    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
    assert (recordKeys.size() == 100);
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
    assertThrows(IOException.class, () -> {
        while (itr.hasNext()) {
            throw new IOException("Testing failures...");
        }
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 8 with DefaultSizeEstimator

use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.

the class TestExternalSpillableMap method testEstimationWithEmptyMap.

@Test
public void testEstimationWithEmptyMap() throws IOException, URISyntaxException {
    final ExternalSpillableMap.DiskMapType diskMapType = ExternalSpillableMap.DiskMapType.BITCASK;
    final boolean isCompressionEnabled = false;
    final Schema schema = SchemaTestUtil.getSimpleSchema();
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled);
    List<String> recordKeys = new ArrayList<>();
    // Put a single record. Payload size estimation happens as part of this initial put.
    HoodieRecord seedRecord = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1).get(0);
    records.put(seedRecord.getRecordKey(), seedRecord);
    // Remove the key immediately to make the map empty again.
    records.remove(seedRecord.getRecordKey());
    // Verify payload size re-estimation does not throw exception
    List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 250);
    hoodieRecords.stream().forEach(hoodieRecord -> {
        assertDoesNotThrow(() -> {
            records.put(hoodieRecord.getRecordKey(), hoodieRecord);
        }, "ExternalSpillableMap put() should not throw exception!");
        recordKeys.add(hoodieRecord.getRecordKey());
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with DefaultSizeEstimator

use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.

the class TestExternalSpillableMap method testSimpleUpsert.

@ParameterizedTest
@MethodSource("testArguments")
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
    isCompressionEnabled);
    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
    assert (recordKeys.size() == 100);
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
    while (itr.hasNext()) {
        HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
        assert recordKeys.contains(rec.getRecordKey());
    }
    List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys, SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewInstantTime());
    // update records already inserted
    SpillableMapTestUtils.upsertRecords(updatedRecords, records);
    // make sure we have records spilled to disk
    assertTrue(records.getDiskBasedMapNumEntries() > 0);
    // iterate over the updated records and compare the value from Map
    updatedRecords.forEach(record -> {
        HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
        try {
            assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
        } catch (IOException io) {
            throw new UncheckedIOException(io);
        }
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 10 with DefaultSizeEstimator

use of org.apache.hudi.common.util.DefaultSizeEstimator in project hudi by apache.

the class HoodieMergeHandle method initializeIncomingRecordsMap.

/**
 * Initialize a spillable map for incoming records.
 */
protected void initializeIncomingRecordsMap() {
    try {
        // Load the new records in a map
        long memoryForMerge = IOUtils.getMaxMemoryPerPartitionMerge(taskContextSupplier, config);
        LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
        this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(), new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(tableSchema), config.getCommonConfig().getSpillableDiskMapType(), config.getCommonConfig().isBitCaskDiskMapCompressionEnabled());
    } catch (IOException io) {
        throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
    }
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator)

Aggregations

DefaultSizeEstimator (org.apache.hudi.common.util.DefaultSizeEstimator)15 IOException (java.io.IOException)10 HoodieRecordSizeEstimator (org.apache.hudi.common.util.HoodieRecordSizeEstimator)9 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)8 Schema (org.apache.avro.Schema)7 IndexedRecord (org.apache.avro.generic.IndexedRecord)7 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)7 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)7 MethodSource (org.junit.jupiter.params.provider.MethodSource)6 File (java.io.File)5 ExternalSpillableMap (org.apache.hudi.common.util.collection.ExternalSpillableMap)5 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)4 UncheckedIOException (java.io.UncheckedIOException)3 ArrayList (java.util.ArrayList)3 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)3 Test (org.junit.jupiter.api.Test)3 GenericRecord (org.apache.avro.generic.GenericRecord)2 HoodieAvroPayload (org.apache.hudi.common.model.HoodieAvroPayload)2 HoodieKey (org.apache.hudi.common.model.HoodieKey)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2