use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.
the class HoodieMergedLogRecordScanner method processNextRecord.
@Override
protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
String key = hoodieRecord.getRecordKey();
if (records.containsKey(key)) {
// Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
// done when a delete (empty payload) is encountered before or after an insert/update.
HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
HoodieRecordPayload oldValue = oldRecord.getData();
HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue);
// If combinedValue is oldValue, no need rePut oldRecord
if (combinedValue != oldValue) {
HoodieOperation operation = hoodieRecord.getOperation();
records.put(key, new HoodieAvroRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
}
} else {
// Put the record as is
records.put(key, hoodieRecord);
}
}
use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.
the class SpillableMapUtils method convertToHoodieRecordPayload.
/**
* Utility method to convert bytes to HoodieRecord using schema and payload class.
*/
public static <R> R convertToHoodieRecordPayload(GenericRecord record, String payloadClazz, String preCombineField, Pair<String, String> recordKeyPartitionPathFieldPair, boolean withOperationField, Option<String> partitionName) {
final String recKey = record.get(recordKeyPartitionPathFieldPair.getKey()).toString();
final String partitionPath = (partitionName.isPresent() ? partitionName.get() : record.get(recordKeyPartitionPathFieldPair.getRight()).toString());
Object preCombineVal = getPreCombineVal(record, preCombineField);
HoodieOperation operation = withOperationField ? HoodieOperation.fromName(getNullableValAsString(record, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieAvroRecord<>(new HoodieKey(recKey, partitionPath), ReflectionUtils.loadPayload(payloadClazz, new Object[] { record, preCombineVal }, GenericRecord.class, Comparable.class), operation);
return (R) hoodieRecord;
}
use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.
the class FlinkWriteHelper method deduplicateRecords.
@Override
public List<HoodieRecord<T>> deduplicateRecords(List<HoodieRecord<T>> records, HoodieIndex<?, ?> index, int parallelism) {
Map<Object, List<Pair<Object, HoodieRecord<T>>>> keyedRecords = records.stream().map(record -> {
// If index used is global, then records are expected to differ in their partitionPath
final Object key = record.getKey().getRecordKey();
return Pair.of(key, record);
}).collect(Collectors.groupingBy(Pair::getLeft));
return keyedRecords.values().stream().map(x -> x.stream().map(Pair::getRight).reduce((rec1, rec2) -> {
final T data1 = rec1.getData();
final T data2 = rec2.getData();
@SuppressWarnings("unchecked") final T reducedData = (T) data2.preCombine(data1);
// we cannot allow the user to change the key or partitionPath, since that will affect
// everything
// so pick it from one of the records.
boolean choosePrev = data1.equals(reducedData);
HoodieKey reducedKey = choosePrev ? rec1.getKey() : rec2.getKey();
HoodieOperation operation = choosePrev ? rec1.getOperation() : rec2.getOperation();
HoodieRecord<T> hoodieRecord = new HoodieAvroRecord<>(reducedKey, reducedData, operation);
// reuse the location from the first record.
hoodieRecord.setCurrentLocation(rec1.getCurrentLocation());
return hoodieRecord;
}).orElse(null)).filter(Objects::nonNull).collect(Collectors.toList());
}
use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.
the class FormatUtils method getRowKind.
/**
* Returns the RowKind of the given record, never null.
* Returns RowKind.INSERT when the given field value not found.
*/
private static RowKind getRowKind(IndexedRecord record, int index) {
Object val = record.get(index);
if (val == null) {
return RowKind.INSERT;
}
final HoodieOperation operation = HoodieOperation.fromName(val.toString());
if (HoodieOperation.isInsert(operation)) {
return RowKind.INSERT;
} else if (HoodieOperation.isUpdateBefore(operation)) {
return RowKind.UPDATE_BEFORE;
} else if (HoodieOperation.isUpdateAfter(operation)) {
return RowKind.UPDATE_AFTER;
} else if (HoodieOperation.isDelete(operation)) {
return RowKind.DELETE;
} else {
throw new AssertionError();
}
}
use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.
the class RowDataToHoodieFunction method toHoodieRecord.
/**
* Converts the give record to a {@link HoodieRecord}.
*
* @param record The input record
* @return HoodieRecord based on the configuration
* @throws IOException if error occurs
*/
@SuppressWarnings("rawtypes")
private HoodieRecord toHoodieRecord(I record) throws Exception {
GenericRecord gr = (GenericRecord) this.converter.convert(this.avroSchema, record);
final HoodieKey hoodieKey = keyGenerator.getKey(gr);
HoodieRecordPayload payload = payloadCreation.createPayload(gr);
HoodieOperation operation = HoodieOperation.fromValue(record.getRowKind().toByteValue());
return new HoodieAvroRecord<>(hoodieKey, payload, operation);
}
Aggregations