Search in sources :

Example 61 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class SparkBootstrapCommitActionExecutor method execute.

@Override
public HoodieBootstrapWriteMetadata<HoodieData<WriteStatus>> execute() {
    validate();
    try {
        HoodieTableMetaClient metaClient = table.getMetaClient();
        Option<HoodieInstant> completedInstant = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
        ValidationUtils.checkArgument(!completedInstant.isPresent(), "Active Timeline is expected to be empty for bootstrap to be performed. " + "If you want to re-bootstrap, please rollback bootstrap first !!");
        Map<BootstrapMode, List<Pair<String, List<HoodieFileStatus>>>> partitionSelections = listAndProcessSourcePartitions();
        // First run metadata bootstrap which will auto commit
        Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> metadataResult = metadataBootstrap(partitionSelections.get(BootstrapMode.METADATA_ONLY));
        // if there are full bootstrap to be performed, perform that too
        Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> fullBootstrapResult = fullBootstrap(partitionSelections.get(BootstrapMode.FULL_RECORD));
        // Delete the marker directory for the instant
        WriteMarkersFactory.get(config.getMarkersType(), table, instantTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
        return new HoodieBootstrapWriteMetadata(metadataResult, fullBootstrapResult);
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) HoodieIOException(org.apache.hudi.exception.HoodieIOException) List(java.util.List) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 62 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieRowCreateHandle method makeNewPath.

private Path makeNewPath(String partitionPath) {
    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
    try {
        if (!fs.exists(path)) {
            // create a new partition as needed.
            fs.mkdirs(path);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to make dir " + path, e);
    }
    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId, tableConfig.getBaseFileFormat().getFileExtension()));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 63 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BuiltinKeyGenerator method getPartitionPath.

/**
 * Fetch partition path from {@link InternalRow}.
 *
 * @param internalRow {@link InternalRow} instance from which partition path needs to be fetched from.
 * @param structType  schema of the internalRow.
 * @return the partition path.
 */
@Override
@PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
public String getPartitionPath(InternalRow internalRow, StructType structType) {
    try {
        initDeserializer(structType);
        Row row = sparkRowSerDe.deserializeRow(internalRow);
        return getPartitionPath(row);
    } catch (Exception e) {
        throw new HoodieIOException("Conversion of InternalRow to Row failed with exception " + e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) HoodieKeyException(org.apache.hudi.exception.HoodieKeyException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) PublicAPIMethod(org.apache.hudi.PublicAPIMethod)

Example 64 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieSinkTask method put.

@Override
public void put(Collection<SinkRecord> records) {
    for (SinkRecord record : records) {
        String topic = record.topic();
        int partition = record.kafkaPartition();
        TopicPartition tp = new TopicPartition(topic, partition);
        TransactionParticipant transactionParticipant = transactionParticipants.get(tp);
        if (transactionParticipant != null) {
            transactionParticipant.buffer(record);
        }
    }
    for (TopicPartition partition : context.assignment()) {
        if (transactionParticipants.get(partition) == null) {
            throw new RetriableException("TransactionParticipant should be created for each assigned partition, " + "but has not been created for the topic/partition: " + partition.topic() + ":" + partition.partition());
        }
        try {
            transactionParticipants.get(partition).processRecords();
        } catch (HoodieIOException exception) {
            throw new RetriableException("Intermittent write errors for Hudi " + " for the topic/partition: " + partition.topic() + ":" + partition.partition() + " , ensuring kafka connect will retry ", exception);
        }
    }
}
Also used : TransactionParticipant(org.apache.hudi.connect.transaction.TransactionParticipant) ConnectTransactionParticipant(org.apache.hudi.connect.transaction.ConnectTransactionParticipant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) TopicPartition(org.apache.kafka.common.TopicPartition) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) RetriableException(org.apache.kafka.connect.errors.RetriableException)

Example 65 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class ConnectTransactionParticipant method handleEndCommit.

private void handleEndCommit(ControlMessage message) {
    if (ongoingTransactionInfo == null) {
        LOG.warn(String.format("END_COMMIT %s is received while we were NOT in active transaction", message.getCommitTime()));
        return;
    } else if (!ongoingTransactionInfo.getCommitTime().equals(message.getCommitTime())) {
        LOG.error(String.format("Fatal error received END_COMMIT with commit time %s while local transaction commit time %s", message.getCommitTime(), ongoingTransactionInfo.getCommitTime()));
        // Recovery: A new END_COMMIT from leader caused interruption to an existing transaction,
        // explicitly reset Kafka commit offset to ensure no data loss
        cleanupOngoingTransaction();
        syncKafkaOffsetWithLeader(message);
        return;
    }
    context.pause(partition);
    ongoingTransactionInfo.commitInitiated();
    // send Writer Status Message and wait for ACK_COMMIT in async fashion
    try {
        // sendWriterStatus
        List<WriteStatus> writeStatuses = ongoingTransactionInfo.getWriter().close();
        ControlMessage writeStatusEvent = ControlMessage.newBuilder().setProtocolVersion(KafkaConnectConfigs.CURRENT_PROTOCOL_VERSION).setType(ControlMessage.EventType.WRITE_STATUS).setTopicName(partition.topic()).setSenderType(ControlMessage.EntityType.PARTICIPANT).setSenderPartition(partition.partition()).setReceiverType(ControlMessage.EntityType.COORDINATOR).setReceiverPartition(ConnectTransactionCoordinator.COORDINATOR_KAFKA_PARTITION).setCommitTime(ongoingTransactionInfo.getCommitTime()).setParticipantInfo(ControlMessage.ParticipantInfo.newBuilder().setWriteStatus(KafkaConnectUtils.buildWriteStatuses(writeStatuses)).setKafkaOffset(ongoingTransactionInfo.getExpectedKafkaOffset()).build()).build();
        kafkaControlAgent.publishMessage(writeStatusEvent);
    } catch (Exception exception) {
        LOG.error(String.format("Error writing records and ending commit %s for partition %s", message.getCommitTime(), partition.partition()), exception);
        throw new HoodieIOException(String.format("Error writing records and ending commit %s for partition %s", message.getCommitTime(), partition.partition()), new IOException(exception));
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) WriteStatus(org.apache.hudi.client.WriteStatus) ControlMessage(org.apache.hudi.connect.ControlMessage) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17