use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.
the class RowKeyGeneratorHelper method getRecordKeyFromRow.
/**
* Generates record key for the corresponding {@link Row}.
* @param row instance of {@link Row} of interest
* @param recordKeyFields record key fields as a list
* @param recordKeyPositions record key positions for the corresponding record keys in {@code recordKeyFields}
* @param prefixFieldName {@code true} if field name need to be prefixed in the returned result. {@code false} otherwise.
* @return the record key thus generated
*/
public static String getRecordKeyFromRow(Row row, List<String> recordKeyFields, Map<String, List<Integer>> recordKeyPositions, boolean prefixFieldName) {
AtomicBoolean keyIsNullOrEmpty = new AtomicBoolean(true);
String toReturn = recordKeyFields.stream().map(field -> {
String val = null;
List<Integer> fieldPositions = recordKeyPositions.get(field);
if (fieldPositions.size() == 1) {
// simple field
Integer fieldPos = fieldPositions.get(0);
if (row.isNullAt(fieldPos)) {
val = NULL_RECORDKEY_PLACEHOLDER;
} else {
val = row.getAs(field).toString();
if (val.isEmpty()) {
val = EMPTY_RECORDKEY_PLACEHOLDER;
} else {
keyIsNullOrEmpty.set(false);
}
}
} else {
// nested fields
val = getNestedFieldVal(row, recordKeyPositions.get(field)).toString();
if (!val.contains(NULL_RECORDKEY_PLACEHOLDER) && !val.contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
keyIsNullOrEmpty.set(false);
}
}
return prefixFieldName ? (field + ":" + val) : val;
}).collect(Collectors.joining(","));
if (keyIsNullOrEmpty.get()) {
throw new HoodieKeyException("recordKey value: \"" + toReturn + "\" for fields: \"" + Arrays.toString(recordKeyFields.toArray()) + "\" cannot be null or empty.");
}
return toReturn;
}
use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.
the class CustomAvroKeyGenerator method getPartitionPath.
@Override
public String getPartitionPath(GenericRecord record) {
if (getPartitionPathFields() == null) {
throw new HoodieKeyException("Unable to find field names for partition path in cfg");
}
String partitionPathField;
StringBuilder partitionPath = new StringBuilder();
// Corresponds to no partition case
if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
return "";
}
for (String field : getPartitionPathFields()) {
String[] fieldWithType = field.split(SPLIT_REGEX);
if (fieldWithType.length != 2) {
throw new HoodieKeyException("Unable to find field names for partition path in proper format");
}
partitionPathField = fieldWithType[0];
PartitionKeyType keyType = PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
switch(keyType) {
case SIMPLE:
partitionPath.append(new SimpleAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
break;
case TIMESTAMP:
try {
partitionPath.append(new TimestampBasedAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
} catch (IOException e) {
throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class", e);
}
break;
default:
throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
}
partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
}
partitionPath.deleteCharAt(partitionPath.length() - 1);
return partitionPath.toString();
}
use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.
the class CustomKeyGenerator method getPartitionPath.
private String getPartitionPath(Option<GenericRecord> record, Option<Row> row, Option<Pair<InternalRow, StructType>> internalRowStructTypePair) {
if (getPartitionPathFields() == null) {
throw new HoodieKeyException("Unable to find field names for partition path in cfg");
}
String partitionPathField;
StringBuilder partitionPath = new StringBuilder();
// Corresponds to no partition case
if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
return "";
}
for (String field : getPartitionPathFields()) {
String[] fieldWithType = field.split(customAvroKeyGenerator.SPLIT_REGEX);
if (fieldWithType.length != 2) {
throw new HoodieKeyGeneratorException("Unable to find field names for partition path in proper format");
}
partitionPathField = fieldWithType[0];
CustomAvroKeyGenerator.PartitionKeyType keyType = CustomAvroKeyGenerator.PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
switch(keyType) {
case SIMPLE:
if (record.isPresent()) {
partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
} else if (row.isPresent()) {
partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
} else {
partitionPath.append(new SimpleKeyGenerator(config, partitionPathField).getPartitionPath(internalRowStructTypePair.get().getKey(), internalRowStructTypePair.get().getValue()));
}
break;
case TIMESTAMP:
try {
if (record.isPresent()) {
partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(record.get()));
} else if (row.isPresent()) {
partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(row.get()));
} else {
partitionPath.append(new TimestampBasedKeyGenerator(config, partitionPathField).getPartitionPath(internalRowStructTypePair.get().getKey(), internalRowStructTypePair.get().getValue()));
}
} catch (IOException ioe) {
throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class", ioe);
}
break;
default:
throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
}
partitionPath.append(customAvroKeyGenerator.getDefaultPartitionPathSeparator());
}
partitionPath.deleteCharAt(partitionPath.length() - 1);
return partitionPath.toString();
}
use of org.apache.hudi.exception.HoodieKeyException in project hudi by apache.
the class RowKeyGeneratorHelper method getNestedFieldIndices.
/**
* Generate the tree style positions for the field requested for as per the defined struct type.
*
* @param structType schema of interest
* @param field field of interest for which the positions are requested for
* @param isRecordKey {@code true} if the field requested for is a record key. {@code false} in case of a partition path.
* @return the positions of the field as per the struct type.
*/
public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
String[] slices = field.split("\\.");
List<Integer> positions = new ArrayList<>();
int index = 0;
int totalCount = slices.length;
while (index < totalCount) {
String slice = slices[index];
Option<Object> curIndexOpt = structType.getFieldIndex(slice);
if (curIndexOpt.isDefined()) {
int curIndex = (int) curIndexOpt.get();
positions.add(curIndex);
final StructField nestedField = structType.fields()[curIndex];
if (index < totalCount - 1) {
if (!(nestedField.dataType() instanceof StructType)) {
if (isRecordKey) {
throw new HoodieKeyException("Nested field should be of type StructType " + nestedField);
} else {
positions = Collections.singletonList(-1);
break;
}
}
structType = (StructType) nestedField.dataType();
}
} else {
if (isRecordKey) {
throw new HoodieKeyException("Can't find " + slice + " in StructType for the field " + field);
} else {
positions = Collections.singletonList(-1);
break;
}
}
index++;
}
return positions;
}
Aggregations