Search in sources :

Example 36 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project presto by prestodb.

the class TestCustomSplitConversionUtils method testHudiRealtimeSplitConverterRoundTrip.

@Test
public void testHudiRealtimeSplitConverterRoundTrip() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
    String expectedMaxCommitTime = "max_commit_time";
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeFileSplit recreatedSplit = (HoodieRealtimeFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime());
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Option(org.apache.hudi.common.util.Option) Test(org.testng.annotations.Test) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Path(org.apache.hadoop.fs.Path) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) Test(org.testng.annotations.Test)

Example 37 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project presto by prestodb.

the class TestCustomSplitConversionUtils method testHudiRealtimeSplitConverterNoLogRoundTrip.

@Test
public void testHudiRealtimeSplitConverterNoLogRoundTrip() throws IOException {
    List<String> deltaLogPaths = ImmutableList.of();
    List<HoodieLogFile> deltaLogFiles = ImmutableList.of();
    String expectedMaxCommitTime = "max_commit_time";
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeFileSplit recreatedSplit = (HoodieRealtimeFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime());
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) Test(org.testng.annotations.Test)

Example 38 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project urban-eureka by errir503.

the class HudiRealtimeBootstrapBaseFileSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    requireNonNull(customSplitInfo);
    String customFileSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (!isNullOrEmpty(customFileSplitClass) && HoodieRealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
        String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
        List<String> deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        FileSplit bootstrapFileSplit = new FileSplit(new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null);
        split = new HoodieRealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit, false, Option.empty());
        return Optional.of(split);
    }
    return Optional.empty();
}
Also used : CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) Arrays(java.util.Arrays) ImmutableMap(com.google.common.collect.ImmutableMap) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Long.parseLong(java.lang.Long.parseLong) Collections(java.util.Collections) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit)

Example 39 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project urban-eureka by errir503.

the class HudiRealtimeSplitConverter method recreateFileSplitWithCustomInfo.

@Override
public Optional<FileSplit> recreateFileSplitWithCustomInfo(FileSplit split, Map<String, String> customSplitInfo) throws IOException {
    String customSplitClass = customSplitInfo.get(CUSTOM_FILE_SPLIT_CLASS_KEY);
    if (HoodieRealtimeFileSplit.class.getName().equals(customSplitClass)) {
        requireNonNull(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY), "HUDI_DELTA_FILEPATHS_KEY is missing");
        List<String> deltaLogPaths = SPLITTER.splitToList(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY));
        List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
        return Optional.of(new HoodieRealtimeFileSplit(split, requireNonNull(customSplitInfo.get(HUDI_BASEPATH_KEY), "HUDI_BASEPATH_KEY is missing"), deltaLogFiles, requireNonNull(customSplitInfo.get(HUDI_MAX_COMMIT_TIME_KEY), "HUDI_MAX_COMMIT_TIME_KEY is missing"), // false as incremental query is not supported yet
        false, Option.empty()));
    }
    return Optional.empty();
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) CUSTOM_FILE_SPLIT_CLASS_KEY(com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY) ImmutableMap(com.google.common.collect.ImmutableMap) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Splitter(com.google.common.base.Splitter) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 40 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project urban-eureka by errir503.

the class TestCustomSplitConversionUtils method testHudiRealtimeBootstrapBaseFileSplitConverter.

@Test
public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
    String maxCommitTime = "max_commit_time";
    Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
    long bootstrapSourceSplitStartPos = 0L;
    long bootstrapSourceSplitLength = 200L;
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]);
    FileSplit hudiSplit = new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeBootstrapBaseFileSplit recreatedSplit = (HoodieRealtimeBootstrapBaseFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(maxCommitTime, recreatedSplit.getMaxCommitTime());
    assertEquals(bootstrapSourceFilePath, recreatedSplit.getBootstrapFileSplit().getPath());
    assertEquals(bootstrapSourceSplitStartPos, recreatedSplit.getBootstrapFileSplit().getStart());
    assertEquals(bootstrapSourceSplitLength, recreatedSplit.getBootstrapFileSplit().getLength());
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Option(org.apache.hudi.common.util.Option) Test(org.testng.annotations.Test) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) Test(org.testng.annotations.Test)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21