use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.
the class HudiRealtimeBootstrapBaseFileSplitConverter method extractCustomSplitInfo.
@Override
public Optional<Map<String, String>> extractCustomSplitInfo(FileSplit split) {
if (split instanceof HoodieRealtimeBootstrapBaseFileSplit) {
ImmutableMap.Builder<String, String> customSplitInfo = ImmutableMap.builder();
HoodieRealtimeBootstrapBaseFileSplit hudiSplit = (HoodieRealtimeBootstrapBaseFileSplit) split;
customSplitInfo.put(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeBootstrapBaseFileSplit.class.getName());
customSplitInfo.put(BASE_PATH_KEY, hudiSplit.getBasePath());
customSplitInfo.put(MAX_COMMIT_TIME_KEY, hudiSplit.getMaxCommitTime());
customSplitInfo.put(DELTA_FILE_PATHS_KEY, String.join(",", hudiSplit.getDeltaLogPaths()));
customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_PATH, hudiSplit.getBootstrapFileSplit().getPath().toString());
customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_START, String.valueOf(hudiSplit.getBootstrapFileSplit().getStart()));
customSplitInfo.put(BOOTSTRAP_FILE_SPLIT_LEN, String.valueOf(hudiSplit.getBootstrapFileSplit().getLength()));
return Optional.of(customSplitInfo.build());
}
return Optional.empty();
}
use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.
the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.
@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
requireNonNull(customSplitInfo);
String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
return Optional.of(split);
}
return Optional.empty();
}
use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project urban-eureka by errir503.
the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.
@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
String maxCommitTime = "max_commit_time";
Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
long bootstrapSourceSplitStartPos = 0L;
long bootstrapSourceSplitLength = 200L;
FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
FileSplit hudiSplit = new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit, false, Option.empty());
// Test conversion of HudiSplit -> customSplitInfo
Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
// Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
HoodieRealtimeBootstrapBaseFileSplit recreatedSplit = (HoodieRealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
assertEquals(FILE_PATH, recreatedSplit.getPath());
assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
assertEquals(BASE_PATH, recreatedSplit.getBasePath());
assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project presto by prestodb.
the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.
@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
String maxCommitTime = "max_commit_time";
Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
long bootstrapSourceSplitStartPos = 0L;
long bootstrapSourceSplitLength = 200L;
FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
FileSplit hudiSplit = new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit, false, Option.empty());
// Test conversion of HudiSplit -> customSplitInfo
Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
// Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
HoodieRealtimeBootstrapBaseFileSplit recreatedSplit = (HoodieRealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
assertEquals(FILE_PATH, recreatedSplit.getPath());
assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
assertEquals(BASE_PATH, recreatedSplit.getBasePath());
assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
use of org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit in project presto by prestodb.
the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.
@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
requireNonNull(customSplitInfo);
String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
return Optional.of(split);
}
return Optional.empty();
}
Aggregations