Search in sources :

Example 1 with HoodieKeyException

use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.

the class RowKeyGeneratorHelper method getRecordKeyFromRow.

/**
 * Generates record key for the corresponding {@link Row}.
 * @param row instance of {@link Row} of interest
 * @param recordKeyFields record key fields as a list
 * @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
 * @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
 * @return the record key thus generated
 */
public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
    AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
    String toReturn = recordKeyFields.stream().map(field -> {
        String val = null;
        List<Integer> fieldPositions = recordKeyPositions.get(field);
        if (fieldPositions.size() == 1) {
            // simple field
            Integer fieldPos = fieldPositions.get(0);
            if (row.isNullAt(fieldPos)) {
                val = NULL_RECORDKEY_PLACEHOLDER;
            } else {
                val = row.getAs(field).toString();
                if (val.isEmpty()) {
                    val = EMPTY_RECORDKEY_PLACEHOLDER;
                } else {
                    keyIsNullOrEmpty.set(false);
                }
            }
        } else {
            // nested fields
            val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
            if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
                keyIsNullOrEmpty.set(false);
            }
        }
        return prefixFieldName ? (field + ":" + val) : val;
    }).collect(Collectors.joining(","));
    if (keyIsNullOrEmpty.get()) {
        throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
    }
    return toReturn;
}
Also used : DataType(org.apache.spark.sql.types.DataType) DataTypes(org.apache.spark.sql.types.DataTypes) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) IntStream(java.util.stream.IntStream) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Arrays(java.util.Arrays) HUDI_DEFAULT_PARTITION_PATH(org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH) NULL_RECORDKEY_PLACEHOLDER(org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER) DEFAULT_PARTITION_PATH_SEPARATOR(org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) Timestamp(java.sql.Timestamp) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Row(org.apache.spark.sql.Row) Instant(java.time.Instant) Option(scala.Option) Collectors(java.util.stream.Collectors) EMPTY_RECORDKEY_PLACEHOLDER(org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) Collections(java.util.Collections) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with HoodieKeyException

use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.

the class CustomAvroKeyGenerator method getPartitionPath.

@Override
public String getPartitionPath(GenericRecord record) {
    if (getPartitionPathFields() == null) {
        throw new HoodieKeyException("Unable to find field names for partition path in cfg");
    }
    String partitionPathField;
    StringBuilder partitionPath = new StringBuilder();
    // Corresponds to no partition case
    if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
        return "";
    }
    for (String field : getPartitionPathFields()) {
        String[] fieldWithType = field.split(SPLIT_REGEX);
        if (fieldWithType.length != 2) {
            throw new HoodieKeyException("Unable to find field names for partition path in proper format");
        }
        partitionPathField = fieldWithType[0];
        PartitionKeyType keyType = PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
        switch(keyType) {
            case SIMPLE:
                partitionPath.append(new SimpleAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
                break;
            case TIMESTAMP:
                try {
                    partitionPath.append(new TimestampBasedAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
                } catch (IOException e) {
                    throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class", e);
                }
                break;
            default:
                throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
        }
        partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
    }
    partitionPath.deleteCharAt(partitionPath.length() - 1);
    return partitionPath.toString();
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) IOException(java.io.IOException)

Example 3 with HoodieKeyException

use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.

the class CustomKeyGenerator method getPartitionPath.

private String getPartitionPath(Option<GenericRecord> record, Option<Row> row, Option<Pair<InternalRow, StructType>> internalRowStructTypePair) {
    if (getPartitionPathFields() == null) {
        throw new HoodieKeyException("Unable to find field names for partition path in cfg");
    }
    String partitionPathField;
    StringBuilder partitionPath = new StringBuilder();
    // Corresponds to no partition case
    if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
        return "";
    }
    for (String field : getPartitionPathFields()) {
        String[] fieldWithType = field.split(customAvroKeyGenerator.SPLIT_REGEX);
        if (fieldWithType.length != 2) {
            throw new HoodieKeyGeneratorException("Unable to find field names for partition path in proper format");
        }
        partitionPathField = fieldWithType[0];
        CustomAvroKeyGenerator.PartitionKeyType keyType = CustomAvroKeyGenerator.PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
        switch(keyType) {
            case SIMPLE:
                if (record.isPresent()) {
                    partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
                } else if (row.isPresent()) {
                    partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
                } else {
                    partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(internalRowStructTypePair.get().getKey(), internalRowStructTypePair.get().getValue()));
                }
                break;
            case TIMESTAMP:
                try {
                    if (record.isPresent()) {
                        partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
                    } else if (row.isPresent()) {
                        partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
                    } else {
                        partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(internalRowStructTypePair.get().getKey(), internalRowStructTypePair.get().getValue()));
                    }
                } catch (IOException ioe) {
                    throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class", ioe);
                }
                break;
            default:
                throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
        }
        partitionPath.append(customAvroKeyGenerator.getDefaultPartitionPathSeparator());
    }
    partitionPath.deleteCharAt(partitionPath.length() - 1);
    return partitionPath.toString();
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) IOException(java.io.IOException)

Example 4 with HoodieKeyException

use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.

the class RowKeyGeneratorHelper method getNestedFieldIndices.

/**
 * Generate the tree style positions for the field requested for as per the defined struct type.
 *
 * @param structType  schema of interest
 * @param field       field of interest for which the positions are requested for
 * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} in case of a partition path.
 * @return the positions of the field as per the struct type.
 */
public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
    String[] slices = field.split("\\.");
    List<Integer> positions = new ArrayList<>();
    int index = 0;
    int totalCount = slices.length;
    while (index < totalCount) {
        String slice = slices[index];
        Option<Object> curIndexOpt = structType.getFieldIndex(slice);
        if (curIndexOpt.isDefined()) {
            int curIndex = (int) curIndexOpt.get();
            positions.add(curIndex);
            final StructField nestedField = structType.fields()[curIndex];
            if (index < totalCount - 1) {
                if (!(nestedField.dataType() instanceof StructType)) {
                    if (isRecordKey) {
                        throw new HoodieKeyException("Nested field should be of type StructType " + nestedField);
                    } else {
                        positions = Collections.singletonList(-1);
                        break;
                    }
                }
                structType = (StructType) nestedField.dataType();
            }
        } else {
            if (isRecordKey) {
                throw new HoodieKeyException("Can't find " + slice + " in StructType for the field " + field);
            } else {
                positions = Collections.singletonList(-1);
                break;
            }
        }
        index++;
    }
    return positions;
}
Also used : StructField(org.apache.spark.sql.types.StructField) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList)

Aggregations

HoodieKeyException (org.apache.hudi.exception.HoodieKeyException)4 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HoodieKeyGeneratorException (org.apache.hudi.exception.HoodieKeyGeneratorException)2 StructField (org.apache.spark.sql.types.StructField)2 StructType (org.apache.spark.sql.types.StructType)2 Timestamp (java.sql.Timestamp)1 Instant (java.time.Instant)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 DEFAULT_PARTITION_PATH_SEPARATOR (org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR)1 EMPTY_RECORDKEY_PLACEHOLDER (org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER)1 HUDI_DEFAULT_PARTITION_PATH (org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH)1 NULL_RECORDKEY_PLACEHOLDER (org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER)1 Row (org.apache.spark.sql.Row)1