Search in sources :

Example 1 with BaseAvroPayload

use of org.apache.hudi.common.model.BaseAvroPayload in project hudi by apache.

the class BucketAssignFunction method processRecord.

@SuppressWarnings("unchecked")
private void processRecord(HoodieRecord<?> record, Collector<O> out) throws Exception {
    // 1. put the record into the BucketAssigner;
    // 2. look up the state for location, if the record has a location, just send it out;
    // 3. if it is an INSERT, decide the location using the BucketAssigner then send it out.
    final HoodieKey hoodieKey = record.getKey();
    final String recordKey = hoodieKey.getRecordKey();
    final String partitionPath = hoodieKey.getPartitionPath();
    final HoodieRecordLocation location;
    // Only changing records need looking up the index for the location,
    // append only records are always recognized as INSERT.
    HoodieRecordGlobalLocation oldLoc = indexState.value();
    if (isChangingRecords && oldLoc != null) {
        // Set up the instant time as "U" to mark the bucket as an update bucket.
        if (!Objects.equals(oldLoc.getPartitionPath(), partitionPath)) {
            if (globalIndex) {
                // if partition path changes, emit a delete record for old partition path,
                // then update the index state using location with new partition path.
                HoodieRecord<?> deleteRecord = new HoodieAvroRecord<>(new HoodieKey(recordKey, oldLoc.getPartitionPath()), payloadCreation.createDeletePayload((BaseAvroPayload) record.getData()));
                deleteRecord.setCurrentLocation(oldLoc.toLocal("U"));
                deleteRecord.seal();
                out.collect((O) deleteRecord);
            }
            location = getNewRecordLocation(partitionPath);
            updateIndexState(partitionPath, location);
        } else {
            location = oldLoc.toLocal("U");
            this.bucketAssigner.addUpdate(partitionPath, location.getFileId());
        }
    } else {
        location = getNewRecordLocation(partitionPath);
    }
    // always refresh the index
    if (isChangingRecords) {
        updateIndexState(partitionPath, location);
    }
    record.setCurrentLocation(location);
    out.collect((O) record);
}
Also used : BaseAvroPayload(org.apache.hudi.common.model.BaseAvroPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecordGlobalLocation(org.apache.hudi.common.model.HoodieRecordGlobalLocation) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Aggregations

BaseAvroPayload (org.apache.hudi.common.model.BaseAvroPayload)1 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)1 HoodieKey (org.apache.hudi.common.model.HoodieKey)1 HoodieRecordGlobalLocation (org.apache.hudi.common.model.HoodieRecordGlobalLocation)1 HoodieRecordLocation (org.apache.hudi.common.model.HoodieRecordLocation)1