Search in sources :

Example 21 with HoodieRecordPayload

use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.

the class TestExternalSpillableMap method simpleTestWithException.

@ParameterizedTest
@MethodSource("testArguments")
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, failureOutputPath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
    isCompressionEnabled);
    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
    assert (recordKeys.size() == 100);
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
    assertThrows(IOException.class, () -> {
        while (itr.hasNext()) {
            throw new IOException("Testing failures...");
        }
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 22 with HoodieRecordPayload

use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.

the class TestExternalSpillableMap method testEstimationWithEmptyMap.

@Test
public void testEstimationWithEmptyMap() throws IOException, URISyntaxException {
    final ExternalSpillableMap.DiskMapType diskMapType = ExternalSpillableMap.DiskMapType.BITCASK;
    final boolean isCompressionEnabled = false;
    final Schema schema = SchemaTestUtil.getSimpleSchema();
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled);
    List<String> recordKeys = new ArrayList<>();
    // Put a single record. Payload size estimation happens as part of this initial put.
    HoodieRecord seedRecord = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1).get(0);
    records.put(seedRecord.getRecordKey(), seedRecord);
    // Remove the key immediately to make the map empty again.
    records.remove(seedRecord.getRecordKey());
    // Verify payload size re-estimation does not throw exception
    List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 250);
    hoodieRecords.stream().forEach(hoodieRecord -> {
        assertDoesNotThrow(() -> {
            records.put(hoodieRecord.getRecordKey(), hoodieRecord);
        }, "ExternalSpillableMap put() should not throw exception!");
        recordKeys.add(hoodieRecord.getRecordKey());
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 23 with HoodieRecordPayload

use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.

the class TestExternalSpillableMap method testSimpleUpsert.

@ParameterizedTest
@MethodSource("testArguments")
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
    isCompressionEnabled);
    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
    assert (recordKeys.size() == 100);
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
    while (itr.hasNext()) {
        HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
        assert recordKeys.contains(rec.getRecordKey());
    }
    List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys, SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewInstantTime());
    // update records already inserted
    SpillableMapTestUtils.upsertRecords(updatedRecords, records);
    // make sure we have records spilled to disk
    assertTrue(records.getDiskBasedMapNumEntries() > 0);
    // iterate over the updated records and compare the value from Map
    updatedRecords.forEach(record -> {
        HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
        try {
            assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
        } catch (IOException io) {
            throw new UncheckedIOException(io);
        }
    });
}
Also used : HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 24 with HoodieRecordPayload

use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.

the class TestRocksDbDiskMap method testSimpleUpsert.

@Test
public void testSimpleUpsert() throws IOException, URISyntaxException {
    Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
    RocksDbDiskMap rocksDBBasedMap = new RocksDbDiskMap<>(basePath);
    List<IndexedRecord> insertedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(insertedRecords, rocksDBBasedMap);
    String oldCommitTime = ((GenericRecord) insertedRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
    // generate updates from inserts for first 50 keys / subset of keys
    List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys.subList(0, 50), SchemaTestUtil.generateHoodieTestRecords(0, 50), HoodieActiveTimeline.createNewInstantTime());
    String newCommitTime = ((GenericRecord) updatedRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
    // perform upserts
    List<String> updatedRecordKeys = SpillableMapTestUtils.upsertRecords(updatedRecords, rocksDBBasedMap);
    // Upserted records (on disk) should have the latest commit time
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = rocksDBBasedMap.iterator();
    while (itr.hasNext()) {
        HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
        try {
            IndexedRecord indexedRecord = (IndexedRecord) rec.getData().getInsertValue(schema).get();
            String latestCommitTime = ((GenericRecord) indexedRecord).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
            assert recordKeys.contains(rec.getRecordKey()) || updatedRecordKeys.contains(rec.getRecordKey());
            assertEquals(latestCommitTime, updatedRecordKeys.contains(rec.getRecordKey()) ? newCommitTime : oldCommitTime);
        } catch (IOException io) {
            throw new UncheckedIOException(io);
        }
    }
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) Schema(org.apache.avro.Schema) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Test(org.junit.jupiter.api.Test)

Example 25 with HoodieRecordPayload

use of org.apache.hudi.common.model.HoodieRecordPayload in project hudi by apache.

the class HoodieMergedLogRecordScanner method processNextRecord.

@Override
protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
    String key = hoodieRecord.getRecordKey();
    if (records.containsKey(key)) {
        // Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
        // done when a delete (empty payload) is encountered before or after an insert/update.
        HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
        HoodieRecordPayload oldValue = oldRecord.getData();
        HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue);
        // If combinedValue is oldValue, no need rePut oldRecord
        if (combinedValue != oldValue) {
            HoodieOperation operation = hoodieRecord.getOperation();
            records.put(key, new HoodieAvroRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
        }
    } else {
        // Put the record as is
        records.put(key, hoodieRecord);
    }
}
Also used : HoodieOperation(org.apache.hudi.common.model.HoodieOperation) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Aggregations

HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)38 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)30 Schema (org.apache.avro.Schema)19 IOException (java.io.IOException)18 GenericRecord (org.apache.avro.generic.GenericRecord)18 IndexedRecord (org.apache.avro.generic.IndexedRecord)14 ArrayList (java.util.ArrayList)12 HashMap (java.util.HashMap)12 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)12 Option (org.apache.hudi.common.util.Option)12 Map (java.util.Map)11 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)11 List (java.util.List)9 Path (org.apache.hadoop.fs.Path)9 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 Collectors (java.util.stream.Collectors)8 HoodieRecordSizeEstimator (org.apache.hudi.common.util.HoodieRecordSizeEstimator)8 Test (org.junit.jupiter.api.Test)8 UncheckedIOException (java.io.UncheckedIOException)7 Arrays (java.util.Arrays)7