Search in sources :

Example 86 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieSinkTask method bootstrap.

private void bootstrap(Collection<TopicPartition> partitions) {
    LOG.info(String.format("Bootstrap task for connector %s with id %s with assignments %s part %s", connectorName, taskId, context.assignment(), partitions));
    for (TopicPartition partition : partitions) {
        try {
            // If the partition is 0, instantiate the Leader
            if (partition.partition() == ConnectTransactionCoordinator.COORDINATOR_KAFKA_PARTITION) {
                ConnectTransactionCoordinator coordinator = new ConnectTransactionCoordinator(connectConfigs, partition, controlKafkaClient);
                coordinator.start();
                transactionCoordinators.put(partition, coordinator);
            }
            ConnectTransactionParticipant worker = new ConnectTransactionParticipant(connectConfigs, partition, controlKafkaClient, context);
            transactionParticipants.put(partition, worker);
            worker.start();
        } catch (HoodieException exception) {
            LOG.error(String.format("Fatal error initializing task %s for partition %s", taskId, partition.partition()), exception);
        }
    }
}
Also used : ConnectTransactionCoordinator(org.apache.hudi.connect.transaction.ConnectTransactionCoordinator) TopicPartition(org.apache.kafka.common.TopicPartition) ConnectTransactionParticipant(org.apache.hudi.connect.transaction.ConnectTransactionParticipant) HoodieException(org.apache.hudi.exception.HoodieException)

Example 87 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class KafkaConnectUtils method getCommitMetadataForLatestInstant.

/**
 * Get the Metadata from the latest commit file.
 *
 * @param metaClient The {@link HoodieTableMetaClient} to get access to the meta data.
 * @return An Optional {@link HoodieCommitMetadata} containing the meta data from the latest commit file.
 */
public static Option<HoodieCommitMetadata> getCommitMetadataForLatestInstant(HoodieTableMetaClient metaClient) {
    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().filter(instant -> (metaClient.getTableType() == HoodieTableType.COPY_ON_WRITE && instant.getAction().equals(HoodieActiveTimeline.COMMIT_ACTION)) || (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ && instant.getAction().equals(HoodieActiveTimeline.DELTA_COMMIT_ACTION)));
    Option<HoodieInstant> latestInstant = timeline.lastInstant();
    if (latestInstant.isPresent()) {
        try {
            byte[] data = timeline.getInstantDetails(latestInstant.get()).get();
            return Option.of(HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class));
        } catch (Exception e) {
            throw new HoodieException("Failed to read schema from commit metadata", e);
        }
    } else {
        return Option.empty();
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException)

Example 88 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class KafkaConnectUtils method getDefaultHadoopConf.

/**
 * Returns the default Hadoop Configuration.
 *
 * @return
 */
public static Configuration getDefaultHadoopConf(KafkaConnectConfigs connectConfigs) {
    Configuration hadoopConf = new Configuration();
    // add hadoop config files
    if (!StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfDir()) || !StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfHome())) {
        try {
            List<Path> configFiles = getHadoopConfigFiles(connectConfigs.getHadoopConfDir(), connectConfigs.getHadoopConfHome());
            configFiles.forEach(f -> hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
        } catch (Exception e) {
            throw new HoodieException("Failed to read hadoop configuration!", e);
        }
    } else {
        DEFAULT_HADOOP_CONF_FILES.forEach(f -> hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
    }
    connectConfigs.getProps().keySet().stream().filter(prop -> {
        // configuration items before passing to hadoop/hive configs
        return !prop.toString().startsWith(HOODIE_CONF_PREFIX);
    }).forEach(prop -> {
        hadoopConf.set(prop.toString(), connectConfigs.getProps().get(prop.toString()).toString());
    });
    return hadoopConf;
}
Also used : Path(java.nio.file.Path) Arrays(java.util.Arrays) CustomKeyGenerator(org.apache.hudi.keygen.CustomKeyGenerator) MessageDigest(java.security.MessageDigest) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) SlashEncodedDayPartitionValueExtractor(org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) ArrayList(java.util.ArrayList) AdminClient(org.apache.kafka.clients.admin.AdminClient) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) StringUtils(org.apache.hudi.common.util.StringUtils) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) DescribeTopicsResult(org.apache.kafka.clients.admin.DescribeTopicsResult) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) TopicDescription(org.apache.kafka.clients.admin.TopicDescription) CustomAvroKeyGenerator(org.apache.hudi.keygen.CustomAvroKeyGenerator) SerializationUtils(org.apache.hudi.common.util.SerializationUtils) Path(java.nio.file.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) Files(java.nio.file.Files) ControlMessage(org.apache.hudi.connect.ControlMessage) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) KeyGeneratorOptions(org.apache.hudi.keygen.constant.KeyGeneratorOptions) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) KafkaFuture(org.apache.kafka.common.KafkaFuture) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) ByteString(com.google.protobuf.ByteString) Objects(java.util.Objects) WriteStatus(org.apache.hudi.client.WriteStatus) List(java.util.List) FileVisitOption(java.nio.file.FileVisitOption) Paths(java.nio.file.Paths) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) KafkaConnectConfigs(org.apache.hudi.connect.writers.KafkaConnectConfigs) Configuration(org.apache.hadoop.conf.Configuration) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException)

Example 89 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TestAbstractConnectWriter method testAbstractWriterForAllFormats.

@ParameterizedTest
@EnumSource(value = TestInputFormats.class)
public void testAbstractWriterForAllFormats(TestInputFormats inputFormats) throws Exception {
    Schema schema = schemaProvider.getSourceSchema();
    List<?> inputRecords;
    List<HoodieRecord> expectedRecords;
    String formatConverter;
    switch(inputFormats) {
        case JSON_STRING:
            formatConverter = AbstractConnectWriter.KAFKA_STRING_CONVERTER;
            GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(schema, schema);
            inputRecords = SchemaTestUtil.generateTestJsonRecords(0, NUM_RECORDS);
            expectedRecords = ((List<String>) inputRecords).stream().map(s -> {
                try {
                    return HoodieAvroUtils.rewriteRecord((GenericRecord) reader.read(null, DecoderFactory.get().jsonDecoder(schema, s)), schema);
                } catch (IOException exception) {
                    throw new HoodieException("Error converting JSON records to AVRO");
                }
            }).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
            break;
        case AVRO:
            formatConverter = AbstractConnectWriter.KAFKA_AVRO_CONVERTER;
            inputRecords = SchemaTestUtil.generateTestRecords(0, NUM_RECORDS);
            expectedRecords = inputRecords.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, schema)).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
            break;
        default:
            throw new HoodieException("Unknown test scenario " + inputFormats);
    }
    configs = KafkaConnectConfigs.newBuilder().withProperties(Collections.singletonMap(KafkaConnectConfigs.KAFKA_VALUE_CONVERTER, formatConverter)).build();
    AbstractHudiConnectWriterTestWrapper writer = new AbstractHudiConnectWriterTestWrapper(configs, keyGenerator, schemaProvider);
    for (int i = 0; i < NUM_RECORDS; i++) {
        writer.writeRecord(getNextKafkaRecord(inputRecords.get(i)));
    }
    validateRecords(writer.getWrittenRecords(), expectedRecords);
}
Also used : HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) AbstractConnectWriter(org.apache.hudi.connect.writers.AbstractConnectWriter) ArrayList(java.util.ArrayList) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) SchemaTestUtil(org.apache.hudi.common.testutils.SchemaTestUtil) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) TypedProperties(org.apache.hudi.common.config.TypedProperties) Iterator(java.util.Iterator) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) SchemaProvider(org.apache.hudi.schema.SchemaProvider) Comparator(java.util.Comparator) Collections(java.util.Collections) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DecoderFactory(org.apache.avro.io.DecoderFactory) KafkaConnectConfigs(org.apache.hudi.connect.writers.KafkaConnectConfigs) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 90 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TwoToOneDowngradeHandler method downgrade.

@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
    HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
    HoodieTableMetaClient metaClient = table.getMetaClient();
    // re-create marker files if any partial timeline server based markers are found
    HoodieTimeline inflightTimeline = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
    List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
    for (HoodieInstant inflightInstant : commits) {
        // Converts the markers in new format to old format of direct markers
        try {
            convertToDirectMarkers(inflightInstant.getTimestamp(), table, context, config.getMarkersDeleteParallelism());
        } catch (IOException e) {
            throw new HoodieException("Converting marker files to DIRECT style failed during downgrade", e);
        }
    }
    return Collections.EMPTY_MAP;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13