Search in sources :

Example 1 with NULL_RECORDKEY_PLACEHOLDER

use of org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER in project hudi by apache.

the class RowKeyGeneratorHelper method getRecordKeyFromRow.

/**
 * Generates record key for the corresponding {@link Row}.
 * @param row instance of {@link Row} of interest
 * @param recordKeyFields record key fields as a list
 * @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
 * @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
 * @return the record key thus generated
 */
public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
    AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
    String toReturn = recordKeyFields.stream().map(field -> {
        String val = null;
        List<Integer> fieldPositions = recordKeyPositions.get(field);
        if (fieldPositions.size() == 1) {
            // simple field
            Integer fieldPos = fieldPositions.get(0);
            if (row.isNullAt(fieldPos)) {
                val = NULL_RECORDKEY_PLACEHOLDER;
            } else {
                val = row.getAs(field).toString();
                if (val.isEmpty()) {
                    val = EMPTY_RECORDKEY_PLACEHOLDER;
                } else {
                    keyIsNullOrEmpty.set(false);
                }
            }
        } else {
            // nested fields
            val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
            if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
                keyIsNullOrEmpty.set(false);
            }
        }
        return prefixFieldName ? (field + ":" + val) : val;
    }).collect(Collectors.joining(","));
    if (keyIsNullOrEmpty.get()) {
        throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
    }
    return toReturn;
}
Also used : DataType(org.apache.spark.sql.types.DataType) DataTypes(org.apache.spark.sql.types.DataTypes) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) IntStream(java.util.stream.IntStream) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Arrays(java.util.Arrays) HUDI_DEFAULT_PARTITION_PATH(org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH) NULL_RECORDKEY_PLACEHOLDER(org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER) DEFAULT_PARTITION_PATH_SEPARATOR(org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) Timestamp(java.sql.Timestamp) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Row(org.apache.spark.sql.Row) Instant(java.time.Instant) Option(scala.Option) Collectors(java.util.stream.Collectors) EMPTY_RECORDKEY_PLACEHOLDER(org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) Collections(java.util.Collections) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Timestamp (java.sql.Timestamp)1 Instant (java.time.Instant)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 HoodieKeyException (org.apache.hudi.exception.HoodieKeyException)1 DEFAULT_PARTITION_PATH_SEPARATOR (org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR)1 EMPTY_RECORDKEY_PLACEHOLDER (org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER)1 HUDI_DEFAULT_PARTITION_PATH (org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH)1 NULL_RECORDKEY_PLACEHOLDER (org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER)1 Row (org.apache.spark.sql.Row)1 InternalRow (org.apache.spark.sql.catalyst.InternalRow)1 DataType (org.apache.spark.sql.types.DataType)1 DataTypes (org.apache.spark.sql.types.DataTypes)1 StructField (org.apache.spark.sql.types.StructField)1