Search in sources :

Example 1 with HoodieRealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.

the class HudiRealtimeBootstrapBaseFileSplitConverter method extractCustomSplitInfo.

@Override
public Optional<Map<String, String>> extractCustomSplitInfo(FileSplit split) {
    if (split instanceof HoodieRealtimeBootstrapBaseFileSplit) {
        ImmutableMap.Builder<String, String> customSplitInfo = ImmutableMap.builder();
        HoodieRealtimeBootstrapBaseFileSplit hudiSplit = (HoodieRealtimeBootstrapBaseFileSplit) split;
        customSplitInfo.put(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeBootstrapBaseFileSplit.class.getName());
        customSplitInfo.put(BASE_PATH_KEY, hudiSplit.getBasePath());
        customSplitInfo.put(MAX_COMMIT_TIME_KEY, hudiSplit.getMaxCommitTime());
        customSplitInfo.put(DELTA_FILE_PATHS_KEY, String.join(",", hudiSplit.getDeltaLogPaths()));
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_PATH, hudiSplit.getBootstrapFileSplit().getPath().toString());
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_START, String.valueOf(hudiSplit.getBootstrapFileSplit().getStart()));
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_LEN, String.valueOf(hudiSplit.getBootstrapFileSplit().getLength()));
        return Optional.of(customSplitInfo.build());
    }
    return Optional.empty();
}
Also used : ImmutableMap(com.google.common.collect.ImmutableMap) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)

Example 2 with HoodieRealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.

the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    requireNonNull(customSplitInfo);
    String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
        String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
        List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
        split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
        return Optional.of(split);
    }
    return Optional.empty();
}
Also used : CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) Arrays(java.util.Arrays) ImmutableMap(com.google.common.collect.ImmutableMap) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Long.parseLong(java.lang.Long.parseLong) Collections(java.util.Collections) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)

Example 3 with HoodieRealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.

the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.

@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
    String maxCommitTime = "max_commit_time";
    Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
    long bootstrapSourceSplitStartPos = 0L;
    long bootstrapSourceSplitLength = 200L;
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
    FileSplit hudiSplit = new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeBootstrapBaseFileSplit recreatedSplit = (HoodieRealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
    assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
    assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
    assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Option(org.apache.hudi.common.util.Option) Test(org.testng.annotations.Test) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) Test(org.testng.annotations.Test)

Example 4 with HoodieRealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project presto by prestodb.

the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.

@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
    String maxCommitTime = "max_commit_time";
    Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
    long bootstrapSourceSplitStartPos = 0L;
    long bootstrapSourceSplitLength = 200L;
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
    FileSplit hudiSplit = new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeBootstrapBaseFileSplit recreatedSplit = (HoodieRealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
    assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
    assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
    assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Option(org.apache.hudi.common.util.Option) Test(org.testng.annotations.Test) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) Test(org.testng.annotations.Test)

Example 5 with HoodieRealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project presto by prestodb.

the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    requireNonNull(customSplitInfo);
    String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
        String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
        List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
        split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
        return Optional.of(split);
    }
    return Optional.empty();
}
Also used : CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) Arrays(java.util.Arrays) ImmutableMap(com.google.common.collect.ImmutableMap) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Long.parseLong(java.lang.Long.parseLong) Collections(java.util.Collections) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)

Aggregations

HoodieRealtimeBootstrapBaseFileSplit (org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 IOException (java.io.IOException)4 Arrays (java.util.Arrays)4 List (java.util.List)4 Map (java.util.Map)4 Collectors (java.util.stream.Collectors)4 Path (org.apache.hadoop.fs.Path)4 FileSplit (org.apache.hadoop.mapred.FileSplit)4 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)4 Option (org.apache.hudi.common.util.Option)4 CUSTOM_FILE_SPLIT_CLASS_KEY (com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY)2 Strings.isNullOrEmpty (com.google.common.base.Strings.isNullOrEmpty)2 ImmutableList (com.google.common.collect.ImmutableList)2 Long.parseLong (java.lang.Long.parseLong)2 Collections (java.util.Collections)2 Objects.requireNonNull (java.util.Objects.requireNonNull)2 Optional (java.util.Optional)2 BootstrapBaseFileSplit (org.apache.hudi.hadoop.BootstrapBaseFileSplit)2 HoodieRealtimeFileSplit (org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit)2