Search in sources :

Example 76 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project presto by prestodb.

the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    requireNonNull(customSplitInfo);
    String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
        String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
        List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
        split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
        return Optional.of(split);
    }
    return Optional.empty();
}
Also used : CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) Arrays(java.util.Arrays) ImmutableMap(com.google.common.collect.ImmutableMap) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Long.parseLong(java.lang.Long.parseLong) Collections(java.util.Collections) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)

Example 77 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project presto by prestodb.

the class HudiRealtimeSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    String customSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (HoodieRealtimeFileSplit.class.getName().equals(customSplitClass)) {
        requireNonNull(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY), "HUDI_DELTA_FILEPATHS_KEY is missing");
        List<String> deltaLogPaths = SPLITTER.splitToList(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        return Optional.of(new HoodieRealtimeFileSplit(split, requireNonNull(customSplitInfo.get(HUDI_BASEPATH_KEY), "HUDI_BASEPATH_KEY is missing"), deltaLogFiles, requireNonNull(customSplitInfo.get(HUDI_MAX_COMMIT_TIME_KEY), "HUDI_MAX_COMMIT_TIME_KEY is missing"), // false as incremental query is not supported yet
        false, Option.empty()));
    }
    return Optional.empty();
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) ImmutableMap(com.google.common.collect.ImmutableMap) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Splitter(com.google.common.base.Splitter) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21