Search in sources :

Example 91 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieActiveTimeline method deleteInstantFileIfExists.

private void deleteInstantFileIfExists(HoodieInstant instant) {
    LOG.info("Deleting instant " + instant);
    Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
    try {
        if (metaClient.getFs().exists(inFlightCommitFilePath)) {
            boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
            if (result) {
                LOG.info("Removed instant " + instant);
            } else {
                throw new HoodieIOException("Could not delete instant " + instant);
            }
        } else {
            LOG.warn("The commit " + inFlightCommitFilePath + " to remove does not exist");
        }
    } catch (IOException e) {
        throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 92 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class DFSPathSelector method getNextFilePathsAndMaxModificationTime.

/**
 * Get the list of files changed since last checkpoint.
 *
 * @param lastCheckpointStr the last checkpoint time string, empty if first run
 * @param sourceLimit       max bytes to read each time
 * @return the list of files concatenated and their latest modified time
 */
@Deprecated
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr, long sourceLimit) {
    try {
        // obtain all eligible files under root folder.
        log.info("Root path => " + props.getString(Config.ROOT_INPUT_PATH_PROP) + " source limit => " + sourceLimit);
        long lastCheckpointTime = lastCheckpointStr.map(Long::parseLong).orElse(Long.MIN_VALUE);
        List<FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), lastCheckpointTime);
        // sort them by modification time.
        eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime));
        // Filter based on checkpoint & input size, if needed
        long currentBytes = 0;
        long newCheckpointTime = lastCheckpointTime;
        List<FileStatus> filteredFiles = new ArrayList<>();
        for (FileStatus f : eligibleFiles) {
            if (currentBytes + f.getLen() >= sourceLimit && f.getModificationTime() > newCheckpointTime) {
                // so that some files with the same modification time won't be skipped in next read
                break;
            }
            newCheckpointTime = f.getModificationTime();
            currentBytes += f.getLen();
            filteredFiles.add(f);
        }
        // no data to read
        if (filteredFiles.isEmpty()) {
            return new ImmutablePair<>(Option.empty(), String.valueOf(newCheckpointTime));
        }
        // read the files out.
        String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
        return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(newCheckpointTime));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Arrays(java.util.Arrays) TypedProperties(org.apache.hudi.common.config.TypedProperties) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieException(org.apache.hudi.exception.HoodieException) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) DataSourceUtils(org.apache.hudi.DataSourceUtils) FileStatus(org.apache.hadoop.fs.FileStatus) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) List(java.util.List) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 93 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class SchemaRegistryProvider method getTargetSchema.

@Override
public Schema getTargetSchema() {
    String registryUrl = config.getString(Config.SRC_SCHEMA_REGISTRY_URL_PROP);
    String targetRegistryUrl = config.getString(Config.TARGET_SCHEMA_REGISTRY_URL_PROP, registryUrl);
    try {
        return getSchema(targetRegistryUrl);
    } catch (IOException ioe) {
        throw new HoodieIOException("Error reading target schema from registry :" + registryUrl, ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 94 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BaseHoodieWriteClient method archive.

/**
 * Trigger archival for the table. This ensures that the number of commits do not explode
 * and keep increasing unbounded over time.
 * @param table table to commit on.
 */
protected void archive(HoodieTable table) {
    if (!tableServicesEnabled(config)) {
        return;
    }
    try {
        // We cannot have unbounded commit files. Archive commits if we have to archive
        HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
        archiver.archiveIfRequired(context);
    } catch (IOException ioe) {
        throw new HoodieIOException("Failed to archive", ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 95 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BaseHoodieWriteClient method setWriteSchemaForDeletes.

/**
 * Sets write schema from last instant since deletes may not have schema set in the config.
 */
protected void setWriteSchemaForDeletes(HoodieTableMetaClient metaClient) {
    try {
        HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
        Option<HoodieInstant> lastInstant = activeTimeline.filterCompletedInstants().filter(s -> s.getAction().equals(metaClient.getCommitActionType()) || s.getAction().equals(HoodieActiveTimeline.REPLACE_COMMIT_ACTION)).lastInstant();
        if (lastInstant.isPresent()) {
            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
            if (commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)) {
                config.setSchema(commitMetadata.getExtraMetadata().get(HoodieCommitMetadata.SCHEMA_KEY));
            } else {
                throw new HoodieIOException("Latest commit does not have any schema in commit metadata");
            }
        } else {
            throw new HoodieIOException("Deletes issued without any prior commits");
        }
    } catch (IOException e) {
        throw new HoodieIOException("IOException thrown while reading last commit metadata", e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) HoodiePendingRollbackInfo(org.apache.hudi.common.HoodiePendingRollbackInfo) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) Logger(org.apache.log4j.Logger) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) SupportsUpgradeDowngrade(org.apache.hudi.table.upgrade.SupportsUpgradeDowngrade) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieMetrics(org.apache.hudi.metrics.HoodieMetrics) RollbackUtils(org.apache.hudi.table.action.rollback.RollbackUtils) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) Collection(java.util.Collection) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) SavepointHelpers(org.apache.hudi.table.action.savepoint.SavepointHelpers) Stream(java.util.stream.Stream) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieWriteCommitCallbackMessage(org.apache.hudi.callback.common.HoodieWriteCommitCallbackMessage) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) CommitUtils(org.apache.hudi.common.util.CommitUtils) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) AsyncCleanerService(org.apache.hudi.async.AsyncCleanerService) LinkedHashMap(java.util.LinkedHashMap) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) UpgradeDowngrade(org.apache.hudi.table.upgrade.UpgradeDowngrade) AsyncArchiveService(org.apache.hudi.async.AsyncArchiveService) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HeartbeatUtils(org.apache.hudi.client.heartbeat.HeartbeatUtils) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieWriteCommitCallback(org.apache.hudi.callback.HoodieWriteCommitCallback) HoodieCommitCallbackFactory(org.apache.hudi.callback.util.HoodieCommitCallbackFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17