Search in sources :

Example 1 with RealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit in project presto by prestodb.

the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    requireNonNull(customSplitInfo);
    String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (!isNullOrEmpty(customFileSplitClass) && RealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
        String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
        List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
        FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
        split = new RealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogPaths, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit);
        return Optional.of(split);
    }
    return Optional.empty();
}
Also used : Path(org.apache.hadoop.fs.Path) RealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit) RealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit)

Example 2 with RealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit in project presto by prestodb.

the class HudiRealtimeBootstrapBaseFileSplitConverter method extractCustomSplitInfo.

@Override
public Optional<Map<String, String>> extractCustomSplitInfo(FileSplit split) {
    if (split instanceof RealtimeBootstrapBaseFileSplit) {
        ImmutableMap.Builder<String, String> customSplitInfo = ImmutableMap.builder();
        RealtimeBootstrapBaseFileSplit hudiSplit = (RealtimeBootstrapBaseFileSplit) split;
        customSplitInfo.put(CUSTOM_FILE_SPLIT_CLASS_KEY, RealtimeBootstrapBaseFileSplit.class.getName());
        customSplitInfo.put(BASE_PATH_KEY, hudiSplit.getBasePath());
        customSplitInfo.put(MAX_COMMIT_TIME_KEY, hudiSplit.getMaxCommitTime());
        customSplitInfo.put(DELTA_FILE_PATHS_KEY, String.join(",", hudiSplit.getDeltaLogPaths()));
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_PATH, hudiSplit.getBootstrapFileSplit().getPath().toString());
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_START, String.valueOf(hudiSplit.getBootstrapFileSplit().getStart()));
        customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_LEN, String.valueOf(hudiSplit.getBootstrapFileSplit().getLength()));
        return Optional.of(customSplitInfo.build());
    }
    return Optional.empty();
}
Also used : RealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with RealtimeBootstrapBaseFileSplit

use of org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit in project presto by prestodb.

the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.

@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    String maxCommitTime = "max_commit_time";
    Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
    long bootstrapSourceSplitStartPos = 0L;
    long bootstrapSourceSplitLength = 200L;
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
    FileSplit hudiSplit = new RealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogPaths, maxCommitTime, bootstrapSourceSplit);
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    RealtimeBootstrapBaseFileSplit recreatedSplit = (RealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
    assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
    assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
    assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
Also used : Path(org.apache.hadoop.fs.Path) RealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) RealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Test(org.testng.annotations.Test)

Aggregations

RealtimeBootstrapBaseFileSplit (org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit)3 Path (org.apache.hadoop.fs.Path)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 BootstrapBaseFileSplit (org.apache.hudi.hadoop.BootstrapBaseFileSplit)1 HoodieRealtimeFileSplit (org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit)1 Test (org.testng.annotations.Test)1