Search in sources :

Example 1 with HoodieMergeArchiveFilePlan

use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.

the class HoodieTimelineArchiver method verifyLastMergeArchiveFilesIfNecessary.

/**
 * Check/Solve if there is any failed and unfinished merge small archive files operation
 * @param context HoodieEngineContext used for parallelize to delete small archive files if necessary.
 * @throws IOException
 */
private void verifyLastMergeArchiveFilesIfNecessary(HoodieEngineContext context) throws IOException {
    if (shouldMergeSmallArchiveFies()) {
        Path planPath = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
        HoodieWrapperFileSystem fs = metaClient.getFs();
        // we need to revert or complete last action.
        if (fs.exists(planPath)) {
            HoodieMergeArchiveFilePlan plan = null;
            try {
                plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fs, planPath).get(), HoodieMergeArchiveFilePlan.class);
            } catch (IOException e) {
                LOG.warn("Parsing merge archive plan failed.", e);
                // Reading partial plan file which means last merge action is failed during writing plan file.
                fs.delete(planPath);
                return;
            }
            Path mergedArchiveFile = new Path(metaClient.getArchivePath(), plan.getMergedArchiveFileName());
            List<Path> candidates = plan.getCandidate().stream().map(Path::new).collect(Collectors.toList());
            if (candidateAllExists(candidates)) {
                // Revert last action by deleting mergedArchiveFile if existed.
                if (fs.exists(mergedArchiveFile)) {
                    fs.delete(mergedArchiveFile, false);
                }
            } else {
                // Try to complete last action
                if (fs.exists(mergedArchiveFile)) {
                    deleteFilesParallelize(metaClient, plan.getCandidate(), context, true);
                }
            }
            fs.delete(planPath);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)

Example 2 with HoodieMergeArchiveFilePlan

use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.

the class HoodieArchivedTimeline method loadInstants.

/**
 * This is method to read selected instants. Do NOT use this directly use one of the helper methods above
 * If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
 * If filter is specified, only the filtered instants are loaded
 * If commitsFilter is specified, only the filtered records are loaded
 */
private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadInstantDetails, Function<GenericRecord, Boolean> commitsFilter) {
    try {
        // List all files
        FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
        // Sort files by version suffix in reverse (implies reverse chronological order)
        Arrays.sort(fsStatuses, new ArchiveFileVersionComparator());
        Set<HoodieInstant> instantsInRange = new HashSet<>();
        for (FileStatus fs : fsStatuses) {
            // Read the archived file
            try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
                int instantsInPreviousFile = instantsInRange.size();
                // Read the avro blocks
                while (reader.hasNext()) {
                    HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
                    // (such as startTime, endTime of records in the block)
                    try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
                        StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true).filter(r -> commitsFilter.apply((GenericRecord) r)).map(r -> readCommit((GenericRecord) r, loadInstantDetails)).filter(c -> filter == null || filter.isInRange(c)).forEach(instantsInRange::add);
                    }
                }
                if (filter != null) {
                    int instantsInCurrentFile = instantsInRange.size() - instantsInPreviousFile;
                    if (instantsInPreviousFile > 0 && instantsInCurrentFile == 0) {
                        // This signals we crossed lower bound of desired time window.
                        break;
                    }
                }
            } catch (Exception originalException) {
                // need to ignore this kind of exception here.
                try {
                    Path planPath = new Path(metaClient.getArchivePath(), MERGE_ARCHIVE_PLAN_NAME);
                    HoodieWrapperFileSystem fileSystem = metaClient.getFs();
                    if (fileSystem.exists(planPath)) {
                        HoodieMergeArchiveFilePlan plan = TimelineMetadataUtils.deserializeAvroMetadata(FileIOUtils.readDataFromPath(fileSystem, planPath).get(), HoodieMergeArchiveFilePlan.class);
                        String mergedArchiveFileName = plan.getMergedArchiveFileName();
                        if (!StringUtils.isNullOrEmpty(mergedArchiveFileName) && fs.getPath().getName().equalsIgnoreCase(mergedArchiveFileName)) {
                            LOG.warn("Catch exception because of reading uncompleted merging archive file " + mergedArchiveFileName + ". Ignore it here.");
                            continue;
                        }
                    }
                    throw originalException;
                } catch (Exception e) {
                    // For example corrupted archive file and corrupted plan are both existed.
                    throw originalException;
                }
            }
        }
        ArrayList<HoodieInstant> result = new ArrayList<>(instantsInRange);
        Collections.sort(result);
        return result;
    } catch (IOException e) {
        throw new HoodieIOException("Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
    }
}
Also used : HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Spliterators(java.util.Spliterators) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) Matcher(java.util.regex.Matcher) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) StreamSupport(java.util.stream.StreamSupport) Nonnull(javax.annotation.Nonnull) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Set(java.util.Set) IOException(java.io.IOException) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) Spliterator(java.util.Spliterator) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) ArrayList(java.util.ArrayList) HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) GenericRecord(org.apache.avro.generic.GenericRecord) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 3 with HoodieMergeArchiveFilePlan

use of org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan in project hudi by apache.

the class HoodieTimelineArchiver method buildArchiveMergePlan.

public void buildArchiveMergePlan(List<String> compactCandidate, Path planPath, String compactedArchiveFileName) throws IOException {
    LOG.info("Start to build archive merge plan.");
    HoodieMergeArchiveFilePlan plan = HoodieMergeArchiveFilePlan.newBuilder().setCandidate(compactCandidate).setMergedArchiveFileName(compactedArchiveFileName).build();
    Option<byte[]> content = TimelineMetadataUtils.serializeAvroMetadata(plan, HoodieMergeArchiveFilePlan.class);
    // building merge archive files plan.
    FileIOUtils.createFileInPath(metaClient.getFs(), planPath, content);
    LOG.info("Success to build archive merge plan");
}
Also used : HoodieMergeArchiveFilePlan(org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)

Aggregations

HoodieMergeArchiveFilePlan (org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan)3 IOException (java.io.IOException)2 Path (org.apache.hadoop.fs.Path)2 HoodieWrapperFileSystem (org.apache.hudi.common.fs.HoodieWrapperFileSystem)2 HoodieIOException (org.apache.hudi.exception.HoodieIOException)2 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Spliterator (java.util.Spliterator)1 Spliterators (java.util.Spliterators)1 Function (java.util.function.Function)1 Matcher (java.util.regex.Matcher)1