Search in sources :

Example 41 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project urban-eureka by errir503.

the class TestCustomSplitConversionUtils method testHudiRealtimeSplitConverterRoundTrip.

@Test
public void testHudiRealtimeSplitConverterRoundTrip() throws IOException {
    List<String> deltaLogPaths = Arrays.asList("test1", "test2", "test3");
    List<HoodieLogFile> deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
    String expectedMaxCommitTime = "max_commit_time";
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeFileSplit recreatedSplit = (HoodieRealtimeFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime());
}
Also used : Arrays(java.util.Arrays) Assert.assertEquals(org.testng.Assert.assertEquals) Option(org.apache.hudi.common.util.Option) Test(org.testng.annotations.Test) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Collectors(java.util.stream.Collectors) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) List(java.util.List) FileSplit(org.apache.hadoop.mapred.FileSplit) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Path(org.apache.hadoop.fs.Path) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) Test(org.testng.annotations.Test)

Example 42 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project urban-eureka by errir503.

the class TestCustomSplitConversionUtils method testHudiRealtimeSplitConverterNoLogRoundTrip.

@Test
public void testHudiRealtimeSplitConverterNoLogRoundTrip() throws IOException {
    List<String> deltaLogPaths = ImmutableList.of();
    List<HoodieLogFile> deltaLogFiles = ImmutableList.of();
    String expectedMaxCommitTime = "max_commit_time";
    FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
    FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, false, Option.empty());
    // Test conversion of HudiSplit -> customSplitInfo
    Map<String, String> customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
    // Test conversion of (customSplitInfo + baseSplit) -> HudiSplit
    HoodieRealtimeFileSplit recreatedSplit = (HoodieRealtimeFileSplit) CustomSplitConversionUtils.recreateSplitWithCustomInfo(baseSplit, customSplitInfo);
    assertEquals(FILE_PATH, recreatedSplit.getPath());
    assertEquals(SPLIT_START_POS, recreatedSplit.getStart());
    assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
    assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
    assertEquals(BASE_PATH, recreatedSplit.getBasePath());
    assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
    assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime());
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) HoodieRealtimeBootstrapBaseFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit) FileSplit(org.apache.hadoop.mapred.FileSplit) Test(org.testng.annotations.Test)

Example 43 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestFileSliceMetricUtils method buildFileSlice.

private FileSlice buildFileSlice(long baseFileLen, List<Long> logFileLens) {
    final String baseFilePath = ".b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1";
    FileSlice slice = new FileSlice("partition_0", HoodieActiveTimeline.createNewInstantTime(), UUID.randomUUID().toString());
    HoodieBaseFile baseFile = new HoodieBaseFile(baseFilePath);
    baseFile.setFileLen(baseFileLen);
    slice.setBaseFile(baseFile);
    int logVersion = 1;
    for (long logFileLen : logFileLens) {
        String logFilePath = "." + UUID.randomUUID().toString() + "_20170101134598.log." + logVersion;
        HoodieLogFile logFile = new HoodieLogFile(logFilePath);
        logFile.setFileLen(logFileLen);
        slice.addLogFile(logFile);
        logVersion++;
    }
    return slice;
}
Also used : HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 44 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieWriteableTestTable method appendRecordsToLogFile.

private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
    String partitionPath = groupedRecords.get(0).getPartitionPath();
    HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
    try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
        Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
        header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
        header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
        logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
            try {
                GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
                HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
                return (IndexedRecord) val;
            } catch (IOException e) {
                LOG.warn("Failed to convert record " + r.toString(), e);
                return null;
            }
        }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
        return Pair.of(partitionPath, logWriter.getLogFile());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileSystem(org.apache.hadoop.fs.FileSystem) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieAvroParquetConfig(org.apache.hudi.io.storage.HoodieAvroParquetConfig) FileCreateUtils.baseFileName(org.apache.hudi.common.testutils.FileCreateUtils.baseFileName) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieParquetWriter(org.apache.hudi.io.storage.HoodieParquetWriter) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) CompressionKind(org.apache.orc.CompressionKind) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) HoodieOrcWriter(org.apache.hudi.io.storage.HoodieOrcWriter) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieOrcConfig(org.apache.hudi.io.storage.HoodieOrcConfig) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Paths(java.nio.file.Paths) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieAvroWriteSupport(org.apache.hudi.avro.HoodieAvroWriteSupport) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) LogManager(org.apache.log4j.LogManager) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 45 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestCompactionUtils method testBuildFromFileSlice.

@Test
public void testBuildFromFileSlice() {
    String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    // Empty File-Slice with no data and log files
    FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
    HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(emptyFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with data-file but no log files
    FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
    noLogFileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with no data-file but log files present
    FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with data-file and log files present
    FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    fileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
    fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
    fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], fileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21