Search in sources :

Example 6 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieDeltaStreamer method sync.

/**
 * Main method to start syncing.
 *
 * @throws Exception
 */
public void sync() throws Exception {
    if (bootstrapExecutor.isPresent()) {
        LOG.info("Performing bootstrap. Source=" + bootstrapExecutor.get().getBootstrapConfig().getBootstrapSourceBasePath());
        bootstrapExecutor.get().execute();
    } else {
        if (cfg.continuousMode) {
            deltaSyncService.ifPresent(ds -> {
                ds.start(this::onDeltaSyncShutdown);
                try {
                    ds.waitForShutdown();
                } catch (Exception e) {
                    throw new HoodieException(e.getMessage(), e);
                }
            });
            LOG.info("Delta Sync shutting down");
        } else {
            LOG.info("Delta Streamer running only single round");
            try {
                deltaSyncService.ifPresent(ds -> {
                    try {
                        ds.getDeltaSync().syncOnce();
                    } catch (IOException e) {
                        throw new HoodieIOException(e.getMessage(), e);
                    }
                });
            } catch (Exception ex) {
                LOG.error("Got error running delta sync once. Shutting down", ex);
                throw ex;
            } finally {
                deltaSyncService.ifPresent(DeltaSyncService::close);
                LOG.info("Shut down delta streamer");
            }
        }
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieClusteringUpdateException(org.apache.hudi.exception.HoodieClusteringUpdateException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 7 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieRepairTool method run.

public boolean run() {
    Option<String> startingInstantOption = Option.ofNullable(cfg.startingInstantTime);
    Option<String> endingInstantOption = Option.ofNullable(cfg.endingInstantTime);
    if (startingInstantOption.isPresent() && endingInstantOption.isPresent()) {
        LOG.info(String.format("Start repairing completed instants between %s and %s (inclusive)", startingInstantOption.get(), endingInstantOption.get()));
    } else if (startingInstantOption.isPresent()) {
        LOG.info(String.format("Start repairing completed instants from %s (inclusive)", startingInstantOption.get()));
    } else if (endingInstantOption.isPresent()) {
        LOG.info(String.format("Start repairing completed instants till %s (inclusive)", endingInstantOption.get()));
    } else {
        LOG.info("Start repairing all completed instants");
    }
    try {
        Mode mode = Mode.valueOf(cfg.runningMode.toUpperCase());
        switch(mode) {
            case REPAIR:
                LOG.info(" ****** The repair tool is in REPAIR mode, dangling data and logs files " + "not belonging to any commit are going to be DELETED from the table ******");
                if (checkBackupPathForRepair() < 0) {
                    LOG.error("Backup path check failed.");
                    return false;
                }
                return doRepair(startingInstantOption, endingInstantOption, false);
            case DRY_RUN:
                LOG.info(" ****** The repair tool is in DRY_RUN mode, " + "only LOOKING FOR dangling data and log files from the table ******");
                return doRepair(startingInstantOption, endingInstantOption, true);
            case UNDO:
                if (checkBackupPathAgainstBasePath() < 0) {
                    LOG.error("Backup path check failed.");
                    return false;
                }
                return undoRepair();
            default:
                LOG.info("Unsupported running mode [" + cfg.runningMode + "], quit the job directly");
                return false;
        }
    } catch (IOException e) {
        throw new HoodieIOException("Unable to repair table in " + cfg.basePath, e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 8 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class MarkerDirState method writeMarkerTypeToFile.

/**
 * Writes marker type, "TIMELINE_SERVER_BASED", to file.
 */
private void writeMarkerTypeToFile() {
    Path dirPath = new Path(markerDirPath);
    try {
        if (!fileSystem.exists(dirPath)) {
            // There is no existing marker directory, create a new directory and write marker type
            fileSystem.mkdirs(dirPath);
            MarkerUtils.writeMarkerTypeToFile(MarkerType.TIMELINE_SERVER_BASED, fileSystem, markerDirPath);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to write marker type file in " + markerDirPath + ": " + e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 9 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HiveIncrPullSource method fetchNewData.

@Override
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
    try {
        // find the source commit to pull
        Option<String> commitToPull = findCommitToPull(lastCheckpointStr);
        if (!commitToPull.isPresent()) {
            return new InputBatch<>(Option.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : "");
        }
        // read the files out.
        List<FileStatus> commitDeltaFiles = Arrays.asList(fs.listStatus(new Path(incrPullRootPath, commitToPull.get())));
        String pathStr = commitDeltaFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
        JavaPairRDD<AvroKey, NullWritable> avroRDD = sparkContext.newAPIHadoopFile(pathStr, AvroKeyInputFormat.class, AvroKey.class, NullWritable.class, sparkContext.hadoopConfiguration());
        sparkContext.setJobGroup(this.getClass().getSimpleName(), "Fetch new data");
        return new InputBatch<>(Option.of(avroRDD.keys().map(r -> ((GenericRecord) r.datum()))), String.valueOf(commitToPull.get()));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Arrays(java.util.Arrays) NullWritable(org.apache.hadoop.io.NullWritable) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) DataSourceUtils(org.apache.hudi.DataSourceUtils) AvroKey(org.apache.avro.mapred.AvroKey) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Path(org.apache.hadoop.fs.Path) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) SchemaProvider(org.apache.hudi.utilities.schema.SchemaProvider) GenericRecord(org.apache.avro.generic.GenericRecord) TypedProperties(org.apache.hudi.common.config.TypedProperties) IOException(java.io.IOException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Collectors(java.util.stream.Collectors) AvroKeyInputFormat(org.apache.avro.mapreduce.AvroKeyInputFormat) List(java.util.List) HiveIncrementalPuller(org.apache.hudi.utilities.HiveIncrementalPuller) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) AvroKey(org.apache.avro.mapred.AvroKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) NullWritable(org.apache.hadoop.io.NullWritable) HoodieIOException(org.apache.hudi.exception.HoodieIOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 10 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CompactionAdminClient method validateCompactionOperation.

/**
 * Check if a compaction operation is valid.
 *
 * @param metaClient Hoodie Table Meta client
 * @param compactionInstant Compaction Instant
 * @param operation Compaction Operation
 * @param fsViewOpt File System View
 */
private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt) throws IOException {
    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    Option<HoodieInstant> lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant();
    try {
        if (lastInstant.isPresent()) {
            Option<FileSlice> fileSliceOptional = Option.fromJavaOptional(fileSystemView.getLatestUnCompactedFileSlices(operation.getPartitionPath()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst());
            if (fileSliceOptional.isPresent()) {
                FileSlice fs = fileSliceOptional.get();
                Option<HoodieBaseFile> df = fs.getBaseFile();
                if (operation.getDataFileName().isPresent()) {
                    String expPath = metaClient.getFs().getFileStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(operation.getDataFileName().get()))).getPath().toString();
                    ValidationUtils.checkArgument(df.isPresent(), "Data File must be present. File Slice was : " + fs + ", operation :" + operation);
                    ValidationUtils.checkArgument(df.get().getPath().equals(expPath), "Base Path in operation is specified as " + expPath + " but got path " + df.get().getPath());
                }
                Set<HoodieLogFile> logFilesInFileSlice = fs.getLogFiles().collect(Collectors.toSet());
                Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFileNames().stream().map(dp -> {
                    try {
                        FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), new Path(dp)));
                        ValidationUtils.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status");
                        return new HoodieLogFile(fileStatuses[0]);
                    } catch (FileNotFoundException fe) {
                        throw new CompactionValidationException(fe.getMessage());
                    } catch (IOException ioe) {
                        throw new HoodieIOException(ioe.getMessage(), ioe);
                    }
                }).collect(Collectors.toSet());
                Set<HoodieLogFile> missing = logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf)).collect(Collectors.toSet());
                ValidationUtils.checkArgument(missing.isEmpty(), "All log files specified in compaction operation is not present. Missing :" + missing + ", Exp :" + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
                Set<HoodieLogFile> diff = logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf)).collect(Collectors.toSet());
                ValidationUtils.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)), "There are some log-files which are neither specified in compaction plan " + "nor present after compaction request instant. Some of these :" + diff);
            } else {
                throw new CompactionValidationException("Unable to find file-slice for file-id (" + operation.getFileId() + " Compaction operation is invalid.");
            }
        } else {
            throw new CompactionValidationException("Unable to find any committed instant. Compaction Operation may be pointing to stale file-slices");
        }
    } catch (CompactionValidationException | IllegalArgumentException e) {
        return new ValidationOpResult(operation, false, Option.of(e));
    }
    return new ValidationOpResult(operation, true, Option.empty());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileSlice(org.apache.hudi.common.model.FileSlice) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17