Search in sources :

Example 16 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieActiveTimeline method createRequestedReplaceCommit.

public void createRequestedReplaceCommit(String instantTime, String actionType) {
    try {
        HoodieInstant instant = new HoodieInstant(State.REQUESTED, actionType, instantTime);
        LOG.info("Creating a new instant " + instant);
        // Create the request replace file
        createFileInMetaPath(instant.getFileName(), TimelineMetadataUtils.serializeRequestedReplaceMetadata(new HoodieRequestedReplaceMetadata()), false);
    } catch (IOException e) {
        throw new HoodieIOException("Error create requested replace commit ", e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)

Example 17 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieActiveTimeline method transitionState.

private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant, Option<byte[]> data, boolean allowRedundantTransitions) {
    ValidationUtils.checkArgument(fromInstant.getTimestamp().equals(toInstant.getTimestamp()));
    try {
        if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
            // Re-create the .inflight file by opening a new file and write the commit metadata in
            createFileInMetaPath(fromInstant.getFileName(), data, allowRedundantTransitions);
            Path fromInstantPath = new Path(metaClient.getMetaPath(), fromInstant.getFileName());
            Path toInstantPath = new Path(metaClient.getMetaPath(), toInstant.getFileName());
            boolean success = metaClient.getFs().rename(fromInstantPath, toInstantPath);
            if (!success) {
                throw new HoodieIOException("Could not rename " + fromInstantPath + " to " + toInstantPath);
            }
        } else {
            // Ensures old state exists in timeline
            LOG.info("Checking for file exists ?" + new Path(metaClient.getMetaPath(), fromInstant.getFileName()));
            ValidationUtils.checkArgument(metaClient.getFs().exists(new Path(metaClient.getMetaPath(), fromInstant.getFileName())));
            // Use Write Once to create Target File
            if (allowRedundantTransitions) {
                FileIOUtils.createFileInPath(metaClient.getFs(), new Path(metaClient.getMetaPath(), toInstant.getFileName()), data);
            } else {
                createImmutableFileInPath(new Path(metaClient.getMetaPath(), toInstant.getFileName()), data);
            }
            LOG.info("Create new file for toInstant ?" + new Path(metaClient.getMetaPath(), toInstant.getFileName()));
        }
    } catch (IOException e) {
        throw new HoodieIOException("Could not complete " + fromInstant, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 18 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieArchivedTimeline method loadInstants.

/**
 * This is method to read selected instants. Do NOT use this directly use one of the helper methods above
 * If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
 * If filter is specified, only the filtered instants are loaded
 * If commitsFilter is specified, only the filtered records are loaded
 */
private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadInstantDetails, Function<GenericRecord, Boolean> commitsFilter) {
    try {
        // List all files
        FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
        // Sort files by version suffix in reverse (implies reverse chronological order)
        Arrays.sort(fsStatuses, new ArchiveFileVersionComparator());
        Set<HoodieInstant> instantsInRange = new HashSet<>();
        for (FileStatus fs : fsStatuses) {
            // Read the archived file
            try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
                int instantsInPreviousFile = instantsInRange.size();
                // Read the avro blocks
                while (reader.hasNext()) {
                    HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
                    // (such as startTime, endTime of records in the block)
                    try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
                        StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true).filter(r -> commitsFilter.apply((GenericRecord) r)).map(r -> readCommit((GenericRecord) r, loadInstantDetails)).filter(c -> filter == null || filter.isInRange(c)).forEach(instantsInRange::add);
                    }
                }
                if (filter != null) {
                    int instantsInCurrentFile = instantsInRange.size() - instantsInPreviousFile;
                    if (instantsInPreviousFile > 0 && instantsInCurrentFile == 0) {
                        // This signals we crossed lower bound of desired time window.
                        break;
                    }
                }
            } catch (Exception originalException) {
                // need to ignore this kind of exception here.
                try {
                    Path planPath = new Path(metaClient.getArchivePath(), MERGE_ARCHIVE_PLAN_NAME);
                    HoodieWrapperFileSystem fileSystem = metaClient.getFs();
                    if (fileSystem.exists(planPath)) {
                        HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class);
                        String mergedArchiveFileName = plan.getMergedArchiveFileName();
                        if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) {
                            LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here.");
                            continue;
                        }
                    }
                    throw originalException;
                } catch (Exception e) {
                    // For example corrupted archive file and corrupted plan are both existed.
                    throw originalException;
                }
            }
        }
        ArrayList<HoodieInstant> result = new ArrayList<>(instantsInRange);
        Collections.sort(result);
        return result;
    } catch (IOException e) {
        throw new HoodieIOException("Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
    }
}
Also used : HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Spliterators(java.util.Spliterators) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) Matcher(java.util.regex.Matcher) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) StreamSupport(java.util.stream.StreamSupport) Nonnull(javax.annotation.Nonnull) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Set(java.util.Set) IOException(java.io.IOException) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) Spliterator(java.util.Spliterator) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) ArrayList(java.util.ArrayList) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) GenericRecord(org.apache.avro.generic.GenericRecord) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 19 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class MarkerUtils method readTimelineServerBasedMarkersFromFileSystem.

/**
 * Reads files containing the markers written by timeline-server-based marker mechanism.
 *
 * @param markerDir   marker directory.
 * @param fileSystem  file system to use.
 * @param context     instance of {@link HoodieEngineContext} to use
 * @param parallelism parallelism to use
 * @return A {@code Map} of file name to the set of markers stored in the file.
 */
public static Map<String, Set<String>> readTimelineServerBasedMarkersFromFileSystem(String markerDir, FileSystem fileSystem, HoodieEngineContext context, int parallelism) {
    Path dirPath = new Path(markerDir);
    try {
        if (fileSystem.exists(dirPath)) {
            Predicate<FileStatus> prefixFilter = fileStatus -> fileStatus.getPath().getName().startsWith(MARKERS_FILENAME_PREFIX);
            Predicate<FileStatus> markerTypeFilter = fileStatus -> !fileStatus.getPath().getName().equals(MARKER_TYPE_FILENAME);
            return FSUtils.parallelizeSubPathProcess(context, fileSystem, dirPath, parallelism, prefixFilter.and(markerTypeFilter), pairOfSubPathAndConf -> {
                String markersFilePathStr = pairOfSubPathAndConf.getKey();
                SerializableConfiguration conf = pairOfSubPathAndConf.getValue();
                return readMarkersFromFile(new Path(markersFilePathStr), conf);
            });
        }
        return new HashMap<>();
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedWriter(java.io.BufferedWriter) Predicate(java.util.function.Predicate) HoodieException(org.apache.hudi.exception.HoodieException) Set(java.util.Set) IOException(java.io.IOException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) FileStatus(org.apache.hadoop.fs.FileStatus) StandardCharsets(java.nio.charset.StandardCharsets) HashSet(java.util.HashSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Logger(org.apache.log4j.Logger) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Path(org.apache.hadoop.fs.Path) OutputStreamWriter(java.io.OutputStreamWriter) FileIOUtils.closeQuietly(org.apache.hudi.common.util.FileIOUtils.closeQuietly) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HashMap(java.util.HashMap) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 20 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class MarkerUtils method readMarkerType.

/**
 * Reads the marker type from `MARKERS.type` file.
 *
 * @param fileSystem file system to use.
 * @param markerDir  marker directory.
 * @return the marker type, or empty if the marker type file does not exist.
 */
public static Option<MarkerType> readMarkerType(FileSystem fileSystem, String markerDir) {
    Path markerTypeFilePath = new Path(markerDir, MARKER_TYPE_FILENAME);
    FSDataInputStream fsDataInputStream = null;
    Option<MarkerType> content = Option.empty();
    try {
        if (!doesMarkerTypeFileExist(fileSystem, markerDir)) {
            return Option.empty();
        }
        fsDataInputStream = fileSystem.open(markerTypeFilePath);
        content = Option.of(MarkerType.valueOf(FileIOUtils.readAsUTFString(fsDataInputStream)));
    } catch (IOException e) {
        throw new HoodieIOException("Cannot read marker type file " + markerTypeFilePath.toString() + "; " + e.getMessage(), e);
    } finally {
        closeQuietly(fsDataInputStream);
    }
    return content;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) MarkerType(org.apache.hudi.common.table.marker.MarkerType) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17