Search in sources :

Example 1 with HoodieKeyGeneratorException

use of org.apache.hudi.exception.HoodieKeyGeneratorException in project hudi by apache.

the class TestCreateAvroKeyGeneratorByTypeWithFactory method testKeyGeneratorTypes.

@ParameterizedTest
@MethodSource("configParams")
public void testKeyGeneratorTypes(String keyGenType) throws IOException {
    props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), keyGenType);
    KeyGeneratorType keyType = KeyGeneratorType.valueOf(keyGenType);
    KeyGenerator keyGenerator = HoodieAvroKeyGeneratorFactory.createKeyGenerator(props);
    switch(keyType) {
        case SIMPLE:
            Assertions.assertEquals(SimpleAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case COMPLEX:
            Assertions.assertEquals(ComplexAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case TIMESTAMP:
            Assertions.assertEquals(TimestampBasedAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case CUSTOM:
            Assertions.assertEquals(CustomAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case NON_PARTITION:
            Assertions.assertEquals(NonpartitionedAvroKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case GLOBAL_DELETE:
            Assertions.assertEquals(GlobalAvroDeleteKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        default:
            throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGenType);
    }
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) TimestampBasedAvroKeyGenerator(org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator) CustomAvroKeyGenerator(org.apache.hudi.keygen.CustomAvroKeyGenerator) ComplexAvroKeyGenerator(org.apache.hudi.keygen.ComplexAvroKeyGenerator) NonpartitionedAvroKeyGenerator(org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator) SimpleAvroKeyGenerator(org.apache.hudi.keygen.SimpleAvroKeyGenerator) KeyGeneratorType(org.apache.hudi.keygen.constant.KeyGeneratorType) NonpartitionedAvroKeyGenerator(org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator) SimpleAvroKeyGenerator(org.apache.hudi.keygen.SimpleAvroKeyGenerator) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) ComplexAvroKeyGenerator(org.apache.hudi.keygen.ComplexAvroKeyGenerator) GlobalAvroDeleteKeyGenerator(org.apache.hudi.keygen.GlobalAvroDeleteKeyGenerator) TimestampBasedAvroKeyGenerator(org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator) CustomAvroKeyGenerator(org.apache.hudi.keygen.CustomAvroKeyGenerator) GlobalAvroDeleteKeyGenerator(org.apache.hudi.keygen.GlobalAvroDeleteKeyGenerator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 2 with HoodieKeyGeneratorException

use of org.apache.hudi.exception.HoodieKeyGeneratorException in project hudi by apache.

the class HoodieAvroKeyGeneratorFactory method createAvroKeyGeneratorByType.

private static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) throws IOException {
    // Use KeyGeneratorType.SIMPLE as default keyGeneratorType
    String keyGeneratorType = props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), null);
    if (StringUtils.isNullOrEmpty(keyGeneratorType)) {
        LOG.info("The value of {} is empty, using SIMPLE", HoodieWriteConfig.KEYGENERATOR_TYPE.key());
        keyGeneratorType = KeyGeneratorType.SIMPLE.name();
    }
    KeyGeneratorType keyGeneratorTypeEnum;
    try {
        keyGeneratorTypeEnum = KeyGeneratorType.valueOf(keyGeneratorType.toUpperCase(Locale.ROOT));
    } catch (IllegalArgumentException e) {
        throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
    }
    switch(keyGeneratorTypeEnum) {
        case SIMPLE:
            return new SimpleAvroKeyGenerator(props);
        case COMPLEX:
            return new ComplexAvroKeyGenerator(props);
        case TIMESTAMP:
            return new TimestampBasedAvroKeyGenerator(props);
        case CUSTOM:
            return new CustomAvroKeyGenerator(props);
        case NON_PARTITION:
            return new NonpartitionedAvroKeyGenerator(props);
        case GLOBAL_DELETE:
            return new GlobalAvroDeleteKeyGenerator(props);
        default:
            throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
    }
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) TimestampBasedAvroKeyGenerator(org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator) CustomAvroKeyGenerator(org.apache.hudi.keygen.CustomAvroKeyGenerator) ComplexAvroKeyGenerator(org.apache.hudi.keygen.ComplexAvroKeyGenerator) NonpartitionedAvroKeyGenerator(org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator) SimpleAvroKeyGenerator(org.apache.hudi.keygen.SimpleAvroKeyGenerator) KeyGeneratorType(org.apache.hudi.keygen.constant.KeyGeneratorType) GlobalAvroDeleteKeyGenerator(org.apache.hudi.keygen.GlobalAvroDeleteKeyGenerator)

Example 3 with HoodieKeyGeneratorException

use of org.apache.hudi.exception.HoodieKeyGeneratorException in project hudi by apache.

the class CustomAvroKeyGenerator method getPartitionPath.

@Override
public String getPartitionPath(GenericRecord record) {
    if (getPartitionPathFields() == null) {
        throw new HoodieKeyException("Unable to find field names for partition path in cfg");
    }
    String partitionPathField;
    StringBuilder partitionPath = new StringBuilder();
    // Corresponds to no partition case
    if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) {
        return "";
    }
    for (String field : getPartitionPathFields()) {
        String[] fieldWithType = field.split(SPLIT_REGEX);
        if (fieldWithType.length != 2) {
            throw new HoodieKeyException("Unable to find field names for partition path in proper format");
        }
        partitionPathField = fieldWithType[0];
        PartitionKeyType keyType = PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
        switch(keyType) {
            case SIMPLE:
                partitionPath.append(new SimpleAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
                break;
            case TIMESTAMP:
                try {
                    partitionPath.append(new TimestampBasedAvroKeyGenerator(config, partitionPathField).getPartitionPath(record));
                } catch (IOException e) {
                    throw new HoodieKeyGeneratorException("Unable to initialise TimestampBasedKeyGenerator class", e);
                }
                break;
            default:
                throw new HoodieKeyGeneratorException("Please provide valid PartitionKeyType with fields! You provided: " + keyType);
        }
        partitionPath.append(DEFAULT_PARTITION_PATH_SEPARATOR);
    }
    partitionPath.deleteCharAt(partitionPath.length() - 1);
    return partitionPath.toString();
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) IOException(java.io.IOException)

Example 4 with HoodieKeyGeneratorException

use of org.apache.hudi.exception.HoodieKeyGeneratorException in project hudi by apache.

the class TestCreateKeyGeneratorByTypeWithFactory method testKeyGeneratorTypes.

@ParameterizedTest
@MethodSource("configParams")
public void testKeyGeneratorTypes(String keyGenType) throws IOException {
    props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), keyGenType);
    KeyGeneratorType keyType = KeyGeneratorType.valueOf(keyGenType);
    KeyGenerator keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
    switch(keyType) {
        case SIMPLE:
            Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case COMPLEX:
            Assertions.assertEquals(ComplexKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case TIMESTAMP:
            Assertions.assertEquals(TimestampBasedKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case CUSTOM:
            Assertions.assertEquals(CustomKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case NON_PARTITION:
            Assertions.assertEquals(NonpartitionedKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        case GLOBAL_DELETE:
            Assertions.assertEquals(GlobalDeleteKeyGenerator.class.getName(), keyGenerator.getClass().getName());
            return;
        default:
            throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGenType);
    }
}
Also used : HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) GlobalDeleteKeyGenerator(org.apache.hudi.keygen.GlobalDeleteKeyGenerator) CustomKeyGenerator(org.apache.hudi.keygen.CustomKeyGenerator) ComplexKeyGenerator(org.apache.hudi.keygen.ComplexKeyGenerator) TimestampBasedKeyGenerator(org.apache.hudi.keygen.TimestampBasedKeyGenerator) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) KeyGeneratorType(org.apache.hudi.keygen.constant.KeyGeneratorType) CustomKeyGenerator(org.apache.hudi.keygen.CustomKeyGenerator) TimestampBasedKeyGenerator(org.apache.hudi.keygen.TimestampBasedKeyGenerator) ComplexKeyGenerator(org.apache.hudi.keygen.ComplexKeyGenerator) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) GlobalDeleteKeyGenerator(org.apache.hudi.keygen.GlobalDeleteKeyGenerator) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) NonpartitionedKeyGenerator(org.apache.hudi.keygen.NonpartitionedKeyGenerator) NonpartitionedKeyGenerator(org.apache.hudi.keygen.NonpartitionedKeyGenerator) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 5 with HoodieKeyGeneratorException

use of org.apache.hudi.exception.HoodieKeyGeneratorException in project hudi by apache.

the class SparkBootstrapCommitActionExecutor method runMetadataBootstrap.

private HoodieData<BootstrapWriteStatus> runMetadataBootstrap(List<Pair<String, List<HoodieFileStatus>>> partitions) {
    if (null == partitions || partitions.isEmpty()) {
        return context.emptyHoodieData();
    }
    TypedProperties properties = new TypedProperties();
    properties.putAll(config.getProps());
    KeyGeneratorInterface keyGenerator;
    try {
        keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(properties);
    } catch (IOException e) {
        throw new HoodieKeyGeneratorException("Init keyGenerator failed ", e);
    }
    BootstrapPartitionPathTranslator translator = (BootstrapPartitionPathTranslator) ReflectionUtils.loadClass(config.getBootstrapPartitionPathTranslatorClass(), properties);
    List<Pair<String, Pair<String, HoodieFileStatus>>> bootstrapPaths = partitions.stream().flatMap(p -> {
        String translatedPartitionPath = translator.getBootstrapTranslatedPath(p.getKey());
        return p.getValue().stream().map(f -> Pair.of(p.getKey(), Pair.of(translatedPartitionPath, f)));
    }).collect(Collectors.toList());
    context.setJobStatus(this.getClass().getSimpleName(), "Bootstrap metadata table.");
    return context.parallelize(bootstrapPaths, config.getBootstrapParallelism()).map(partitionFsPair -> getMetadataHandler(config, table, partitionFsPair.getRight().getRight()).runMetadataBootstrap(partitionFsPair.getLeft(), partitionFsPair.getRight().getLeft(), keyGenerator));
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseCommitActionExecutor(org.apache.hudi.table.action.commit.BaseCommitActionExecutor) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) Logger(org.apache.log4j.Logger) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Duration(java.time.Duration) Map(java.util.Map) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapPartitionPathTranslator(org.apache.hudi.client.bootstrap.translator.BootstrapPartitionPathTranslator) Collection(java.util.Collection) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) WRITE_STATUS_STORAGE_LEVEL_VALUE(org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) SparkBulkInsertCommitActionExecutor(org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) MetadataBootstrapHandlerFactory.getMetadataHandler(org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler) HoodieBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieBootstrapSchemaProvider) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector) HoodieData(org.apache.hudi.common.data.HoodieData) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Iterator(java.util.Iterator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieSparkBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieSparkBootstrapSchemaProvider) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkValidatorUtils(org.apache.hudi.client.utils.SparkValidatorUtils) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) BootstrapPartitionPathTranslator(org.apache.hudi.client.bootstrap.translator.BootstrapPartitionPathTranslator) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) TypedProperties(org.apache.hudi.common.config.TypedProperties) Pair(org.apache.hudi.common.util.collection.Pair)

Aggregations

HoodieKeyGeneratorException (org.apache.hudi.exception.HoodieKeyGeneratorException)7 KeyGeneratorType (org.apache.hudi.keygen.constant.KeyGeneratorType)4 IOException (java.io.IOException)3 HoodieKeyException (org.apache.hudi.exception.HoodieKeyException)2 ComplexKeyGenerator (org.apache.hudi.keygen.ComplexKeyGenerator)2 CustomKeyGenerator (org.apache.hudi.keygen.CustomKeyGenerator)2 GlobalDeleteKeyGenerator (org.apache.hudi.keygen.GlobalDeleteKeyGenerator)2 NonpartitionedKeyGenerator (org.apache.hudi.keygen.NonpartitionedKeyGenerator)2 SimpleKeyGenerator (org.apache.hudi.keygen.SimpleKeyGenerator)2 TimestampBasedKeyGenerator (org.apache.hudi.keygen.TimestampBasedKeyGenerator)2 StandardCharsets (java.nio.charset.StandardCharsets)1 Duration (java.time.Duration)1 Instant (java.time.Instant)1 Collection (java.util.Collection)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 HoodieFileStatus (org.apache.hudi.avro.model.HoodieFileStatus)1