Search in sources :

Example 26 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class StreamWriteOperatorCoordinator method start.

@Override
public void start() throws Exception {
    // setup classloader for APIs that use reflection without taking ClassLoader param
    // reference: https://stackoverflow.com/questions/1771679/difference-between-threads-context-class-loader-and-normal-classloader
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    // initialize event buffer
    reset();
    this.gateways = new SubtaskGateway[this.parallelism];
    // init table, create if not exists.
    this.metaClient = initTableIfNotExists(this.conf);
    // the write client must create after the table creation
    this.writeClient = StreamerUtil.createWriteClient(conf);
    this.tableState = TableState.create(conf);
    // start the executor
    this.executor = NonThrownExecutor.builder(LOG).exceptionHook((errMsg, t) -> this.context.failJob(new HoodieException(errMsg, t))).waitForTasksFinish(true).build();
    // start the executor if required
    if (tableState.syncHive) {
        initHiveSync();
    }
    if (tableState.syncMetadata) {
        initMetadataSync();
    }
    this.ckpMetadata = CkpMetadata.getInstance(this.metaClient.getFs(), metaClient.getBasePath());
    this.ckpMetadata.bootstrap(this.metaClient);
}
Also used : Arrays(java.util.Arrays) CommitAckEvent(org.apache.hudi.sink.event.CommitAckEvent) HoodieException(org.apache.hudi.exception.HoodieException) HiveSyncContext(org.apache.hudi.sink.utils.HiveSyncContext) LoggerFactory(org.slf4j.LoggerFactory) WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) Option(org.apache.hudi.common.util.Option) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CommitUtils(org.apache.hudi.common.util.CommitUtils) CkpMetadata(org.apache.hudi.sink.meta.CkpMetadata) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) OptionsResolver(org.apache.hudi.configuration.OptionsResolver) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Locale(java.util.Locale) Map(java.util.Map) StreamerUtil(org.apache.hudi.util.StreamerUtil) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionUtil(org.apache.hudi.util.CompactionUtil) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Logger(org.slf4j.Logger) NonThrownExecutor(org.apache.hudi.sink.utils.NonThrownExecutor) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) StreamerUtil.initTableIfNotExists(org.apache.hudi.util.StreamerUtil.initTableIfNotExists) CompletionException(java.util.concurrent.CompletionException) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) Objects(java.util.Objects) WriteStatus(org.apache.hudi.client.WriteStatus) Nullable(org.jetbrains.annotations.Nullable) List(java.util.List) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) Collections(java.util.Collections) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) OperatorEvent(org.apache.flink.runtime.operators.coordination.OperatorEvent) HoodieException(org.apache.hudi.exception.HoodieException)

Example 27 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class AppendWriteFunction method initWriterHelper.

// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------
private void initWriterHelper() {
    this.currentInstant = instantToWrite(true);
    if (this.currentInstant == null) {
        // in case there are empty checkpoints that has no input data
        throw new HoodieException("No inflight instant when flushing data!");
    }
    this.writerHelper = new BulkInsertWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(), this.currentInstant, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(), this.rowType);
}
Also used : BulkInsertWriterHelper(org.apache.hudi.sink.bulk.BulkInsertWriterHelper) HoodieException(org.apache.hudi.exception.HoodieException)

Example 28 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class BootstrapOperator method loadRecords.

/**
 * Loads all the indices of give partition path into the backup state.
 *
 * @param partitionPath The partition path
 */
@SuppressWarnings("unchecked")
protected void loadRecords(String partitionPath) throws Exception {
    long start = System.currentTimeMillis();
    final int parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
    final int maxParallelism = getRuntimeContext().getMaxNumberOfParallelSubtasks();
    final int taskID = getRuntimeContext().getIndexOfThisSubtask();
    HoodieTimeline commitsTimeline = this.hoodieTable.getMetaClient().getCommitsTimeline();
    if (!StringUtils.isNullOrEmpty(lastInstantTime)) {
        commitsTimeline = commitsTimeline.findInstantsAfter(lastInstantTime);
    }
    Option<HoodieInstant> latestCommitTime = commitsTimeline.filterCompletedInstants().lastInstant();
    if (latestCommitTime.isPresent()) {
        BaseFileUtils fileUtils = BaseFileUtils.getInstance(this.hoodieTable.getBaseFileFormat());
        Schema schema = new TableSchemaResolver(this.hoodieTable.getMetaClient()).getTableAvroSchema();
        List<FileSlice> fileSlices = this.hoodieTable.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp(), true).collect(toList());
        for (FileSlice fileSlice : fileSlices) {
            if (!shouldLoadFile(fileSlice.getFileId(), maxParallelism, parallelism, taskID)) {
                continue;
            }
            LOG.info("Load records from {}.", fileSlice);
            // load parquet records
            fileSlice.getBaseFile().ifPresent(baseFile -> {
                // filter out crushed files
                if (!isValidFile(baseFile.getFileStatus())) {
                    return;
                }
                try (ClosableIterator<HoodieKey> iterator = fileUtils.getHoodieKeyIterator(this.hadoopConf, new Path(baseFile.getPath()))) {
                    iterator.forEachRemaining(hoodieKey -> {
                        output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
                    });
                }
            });
            // load avro log records
            List<String> logPaths = fileSlice.getLogFiles().filter(logFile -> isValidFile(logFile.getFileStatus())).map(logFile -> logFile.getPath().toString()).collect(toList());
            HoodieMergedLogRecordScanner scanner = FormatUtils.logScanner(logPaths, schema, latestCommitTime.get().getTimestamp(), writeConfig, hadoopConf);
            try {
                for (String recordKey : scanner.getRecords().keySet()) {
                    output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(new HoodieKey(recordKey, partitionPath), fileSlice))));
                }
            } catch (Exception e) {
                throw new HoodieException(String.format("Error when loading record keys from files: %s", logPaths), e);
            } finally {
                scanner.close();
            }
        }
    }
    long cost = System.currentTimeMillis() - start;
    LOG.info("Task [{}}:{}}] finish loading the index under partition {} and sending them to downstream, time cost: {} milliseconds.", this.getClass().getSimpleName(), taskID, partitionPath, cost);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) BootstrapAggFunction(org.apache.hudi.sink.bootstrap.aggregate.BootstrapAggFunction) CkpMetadata(org.apache.hudi.sink.meta.CkpMetadata) ListState(org.apache.flink.api.common.state.ListState) StringUtils(org.apache.hudi.common.util.StringUtils) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Path(org.apache.hadoop.fs.Path) StreamerUtil(org.apache.hudi.util.StreamerUtil) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Types(org.apache.flink.api.common.typeinfo.Types) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) HoodieRecordGlobalLocation(org.apache.hudi.common.model.HoodieRecordGlobalLocation) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) StreamerUtil.isValidFile(org.apache.hudi.util.StreamerUtil.isValidFile) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) GlobalAggregateManager(org.apache.flink.runtime.taskexecutor.GlobalAggregateManager) HoodieKey(org.apache.hudi.common.model.HoodieKey) Pattern(java.util.regex.Pattern) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) FormatUtils(org.apache.hudi.table.format.FormatUtils) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieKey(org.apache.hudi.common.model.HoodieKey) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils)

Example 29 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class CompactionCommitSink method commitIfNecessary.

/**
 * Condition to commit: the commit buffer has equal size with the compaction plan operations
 * and all the compact commit event {@link CompactionCommitEvent} has the same compaction instant time.
 *
 * @param instant Compaction commit instant time
 * @param events  Commit events ever received for the instant
 */
private void commitIfNecessary(String instant, Collection<CompactionCommitEvent> events) throws IOException {
    HoodieCompactionPlan compactionPlan = compactionPlanCache.computeIfAbsent(instant, k -> {
        try {
            return CompactionUtils.getCompactionPlan(this.writeClient.getHoodieTable().getMetaClient(), instant);
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    });
    boolean isReady = compactionPlan.getOperations().size() == events.size();
    if (!isReady) {
        return;
    }
    try {
        doCommit(instant, events);
    } catch (Throwable throwable) {
        // make it fail-safe
        LOG.error("Error while committing compaction instant: " + instant, throwable);
    } finally {
        // reset the status
        reset(instant);
    }
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 30 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class StreamWriteFunction method flushRemaining.

@SuppressWarnings("unchecked, rawtypes")
private void flushRemaining(boolean endInput) {
    this.currentInstant = instantToWrite(hasData());
    if (this.currentInstant == null) {
        // in case there are empty checkpoints that has no input data
        throw new HoodieException("No inflight instant when flushing data!");
    }
    final List<WriteStatus> writeStatus;
    if (buckets.size() > 0) {
        writeStatus = new ArrayList<>();
        this.buckets.values().forEach(bucket -> {
            List<HoodieRecord> records = bucket.writeBuffer();
            if (records.size() > 0) {
                if (config.getBoolean(FlinkOptions.PRE_COMBINE)) {
                    records = FlinkWriteHelper.newInstance().deduplicateRecords(records, (HoodieIndex) null, -1);
                }
                bucket.preWrite(records);
                writeStatus.addAll(writeFunction.apply(records, currentInstant));
                records.clear();
                bucket.reset();
            }
        });
    } else {
        LOG.info("No data to write in subtask [{}] for instant [{}]", taskID, currentInstant);
        writeStatus = Collections.emptyList();
    }
    final WriteMetadataEvent event = WriteMetadataEvent.builder().taskID(taskID).instantTime(currentInstant).writeStatus(writeStatus).lastBatch(true).endInput(endInput).build();
    this.eventGateway.sendEventToCoordinator(event);
    this.buckets.clear();
    this.tracer.reset();
    this.writeClient.cleanHandles();
    this.writeStatuses.addAll(writeStatus);
    // blocks flushing until the coordinator starts a new instant
    this.confirming = true;
}
Also used : HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieException(org.apache.hudi.exception.HoodieException) WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) WriteStatus(org.apache.hudi.client.WriteStatus)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13