Search in sources :

Example 1 with HoodieOperation

use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.

the class HoodieMergedLogRecordScanner method processNextRecord.

@Override
protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException {
    String key = hoodieRecord.getRecordKey();
    if (records.containsKey(key)) {
        // Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
        // done when a delete (empty payload) is encountered before or after an insert/update.
        HoodieRecord<? extends HoodieRecordPayload> oldRecord = records.get(key);
        HoodieRecordPayload oldValue = oldRecord.getData();
        HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue);
        // If combinedValue is oldValue, no need rePut oldRecord
        if (combinedValue != oldValue) {
            HoodieOperation operation = hoodieRecord.getOperation();
            records.put(key, new HoodieAvroRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
        }
    } else {
        // Put the record as is
        records.put(key, hoodieRecord);
    }
}
Also used : HoodieOperation(org.apache.hudi.common.model.HoodieOperation) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Example 2 with HoodieOperation

use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.

the class SpillableMapUtils method convertToHoodieRecordPayload.

/**
 * Utility method to convert bytes to HoodieRecord using schema and payload class.
 */
public static <R> R convertToHoodieRecordPayload(GenericRecord record, String payloadClazz, String preCombineField, Pair<String, String> recordKeyPartitionPathFieldPair, boolean withOperationField, Option<String> partitionName) {
    final String recKey = record.get(recordKeyPartitionPathFieldPair.getKey()).toString();
    final String partitionPath = (partitionName.isPresent() ? partitionName.get() : record.get(recordKeyPartitionPathFieldPair.getRight()).toString());
    Object preCombineVal = getPreCombineVal(record, preCombineField);
    HoodieOperation operation = withOperationField ? HoodieOperation.fromName(getNullableValAsString(record, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
    HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieAvroRecord<>(new HoodieKey(recKey, partitionPath), ReflectionUtils.loadPayload(payloadClazz, new Object[] { record, preCombineVal }, GenericRecord.class, Comparable.class), operation);
    return (R) hoodieRecord;
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieOperation(org.apache.hudi.common.model.HoodieOperation) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieAvroUtils.getNullableValAsString(org.apache.hudi.avro.HoodieAvroUtils.getNullableValAsString) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 3 with HoodieOperation

use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.

the class FlinkWriteHelper method deduplicateRecords.

@Override
public List<HoodieRecord<T>> deduplicateRecords(List<HoodieRecord<T>> records, HoodieIndex<?, ?> index, int parallelism) {
    Map<Object, List<Pair<Object, HoodieRecord<T>>>> keyedRecords = records.stream().map(record -> {
        // If index used is global, then records are expected to differ in their partitionPath
        final Object key = record.getKey().getRecordKey();
        return Pair.of(key, record);
    }).collect(Collectors.groupingBy(Pair::getLeft));
    return keyedRecords.values().stream().map(x -> x.stream().map(Pair::getRight).reduce((rec1, rec2) -> {
        final T data1 = rec1.getData();
        final T data2 = rec2.getData();
        @SuppressWarnings("unchecked") final T reducedData = (T) data2.preCombine(data1);
        // we cannot allow the user to change the key or partitionPath, since that will affect
        // everything
        // so pick it from one of the records.
        boolean choosePrev = data1.equals(reducedData);
        HoodieKey reducedKey = choosePrev ? rec1.getKey() : rec2.getKey();
        HoodieOperation operation = choosePrev ? rec1.getOperation() : rec2.getOperation();
        HoodieRecord<T> hoodieRecord = new HoodieAvroRecord<>(reducedKey, reducedData, operation);
        // reuse the location from the first record.
        hoodieRecord.setCurrentLocation(rec1.getCurrentLocation());
        return hoodieRecord;
    }).orElse(null)).filter(Objects::nonNull).collect(Collectors.toList());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieList(org.apache.hudi.common.data.HoodieList) HoodieOperation(org.apache.hudi.common.model.HoodieOperation) Objects(java.util.Objects) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) Duration(java.time.Duration) Map(java.util.Map) HoodieKey(org.apache.hudi.common.model.HoodieKey) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) Pair(org.apache.hudi.common.util.collection.Pair) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieOperation(org.apache.hudi.common.model.HoodieOperation) HoodieList(org.apache.hudi.common.data.HoodieList) List(java.util.List)

Example 4 with HoodieOperation

use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.

the class FormatUtils method getRowKind.

/**
 * Returns the RowKind of the given record, never null.
 * Returns RowKind.INSERT when the given field value not found.
 */
private static RowKind getRowKind(IndexedRecord record, int index) {
    Object val = record.get(index);
    if (val == null) {
        return RowKind.INSERT;
    }
    final HoodieOperation operation = HoodieOperation.fromName(val.toString());
    if (HoodieOperation.isInsert(operation)) {
        return RowKind.INSERT;
    } else if (HoodieOperation.isUpdateBefore(operation)) {
        return RowKind.UPDATE_BEFORE;
    } else if (HoodieOperation.isUpdateAfter(operation)) {
        return RowKind.UPDATE_AFTER;
    } else if (HoodieOperation.isDelete(operation)) {
        return RowKind.DELETE;
    } else {
        throw new AssertionError();
    }
}
Also used : HoodieOperation(org.apache.hudi.common.model.HoodieOperation)

Example 5 with HoodieOperation

use of org.apache.hudi.common.model.HoodieOperation in project hudi by apache.

the class RowDataToHoodieFunction method toHoodieRecord.

/**
 * Converts the give record to a {@link HoodieRecord}.
 *
 * @param record The input record
 * @return HoodieRecord based on the configuration
 * @throws IOException if error occurs
 */
@SuppressWarnings("rawtypes")
private HoodieRecord toHoodieRecord(I record) throws Exception {
    GenericRecord gr = (GenericRecord) this.converter.convert(this.avroSchema, record);
    final HoodieKey hoodieKey = keyGenerator.getKey(gr);
    HoodieRecordPayload payload = payloadCreation.createPayload(gr);
    HoodieOperation operation = HoodieOperation.fromValue(record.getRowKind().toByteValue());
    return new HoodieAvroRecord<>(hoodieKey, payload, operation);
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieOperation(org.apache.hudi.common.model.HoodieOperation) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Aggregations

HoodieOperation (org.apache.hudi.common.model.HoodieOperation)5 HoodieKey (org.apache.hudi.common.model.HoodieKey)4 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)3 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)3 GenericRecord (org.apache.avro.generic.GenericRecord)2 Duration (java.time.Duration)1 Instant (java.time.Instant)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 HoodieAvroUtils.getNullableValAsString (org.apache.hudi.avro.HoodieAvroUtils.getNullableValAsString)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 HoodieList (org.apache.hudi.common.data.HoodieList)1 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)1 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)1 WriteOperationType (org.apache.hudi.common.model.WriteOperationType)1 Pair (org.apache.hudi.common.util.collection.Pair)1 HoodieUpsertException (org.apache.hudi.exception.HoodieUpsertException)1 HoodieIndex (org.apache.hudi.index.HoodieIndex)1