Search in sources :

Example 11 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class HoodieTimelineArchiver method mergeArchiveFiles.

public void mergeArchiveFiles(List<FileStatus> compactCandidate) throws IOException {
    LOG.info("Starting to merge small archive files.");
    Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
    try {
        List<IndexedRecord> records = new ArrayList<>();
        for (FileStatus fs : compactCandidate) {
            // Read the archived file
            try (HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(metaClient.getFs(), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema())) {
                // Read the avro blocks
                while (reader.hasNext()) {
                    HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
                    blk.getRecordItr().forEachRemaining(records::add);
                    if (records.size() >= this.config.getCommitArchivalBatchSize()) {
                        writeToFile(wrapperSchema, records);
                    }
                }
            }
        }
        writeToFile(wrapperSchema, records);
    } catch (Exception e) {
        throw new HoodieCommitException("Failed to merge small archive files", e);
    } finally {
        writer.close();
    }
    LOG.info("Success to merge small archive files.");
}
Also used : HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieException(org.apache.hudi.exception.HoodieException) FileNotFoundException(java.io.FileNotFoundException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 12 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class TestHoodieLogFormat method testHugeLogFileWrite.

@Test
public void testHugeLogFileWrite() throws IOException, URISyntaxException, InterruptedException {
    Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).withSizeThreshold(3L * 1024 * 1024 * 1024).build();
    Schema schema = getSimpleSchema();
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 1000);
    List<IndexedRecord> copyOfRecords = records.stream().map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
    Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    byte[] dataBlockContentBytes = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header).getContentBytes();
    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(new Configuration(), null, 0, dataBlockContentBytes.length, 0);
    HoodieDataBlock reusableDataBlock = new HoodieAvroDataBlock(null, Option.ofNullable(dataBlockContentBytes), false, logBlockContentLoc, Option.ofNullable(getSimpleSchema()), header, new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
    long writtenSize = 0;
    int logBlockWrittenNum = 0;
    while (writtenSize < Integer.MAX_VALUE) {
        AppendResult appendResult = writer.appendBlock(reusableDataBlock);
        assertTrue(appendResult.size() > 0);
        writtenSize += appendResult.size();
        logBlockWrittenNum++;
    }
    writer.close();
    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true, true);
    assertTrue(reader.hasNext(), "We wrote a block, we should be able to read it");
    HoodieLogBlock nextBlock = reader.next();
    assertEquals(DEFAULT_DATA_BLOCK_TYPE, nextBlock.getBlockType(), "The next block should be a data block");
    HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
    List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
    assertEquals(copyOfRecords.size(), recordsRead.size(), "Read records size should be equal to the written records size");
    assertEquals(copyOfRecords, recordsRead, "Both records lists should be the same. (ordering guaranteed)");
    int logBlockReadNum = 1;
    while (reader.hasNext()) {
        reader.next();
        logBlockReadNum++;
    }
    assertEquals(logBlockWrittenNum, logBlockReadNum, "All written log should be correctly found");
    reader.close();
    // test writing oversize data block which should be rejected
    Writer oversizeWriter = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withSizeThreshold(3L * 1024 * 1024 * 1024).withFs(fs).build();
    List<HoodieLogBlock> dataBlocks = new ArrayList<>(logBlockWrittenNum + 1);
    for (int i = 0; i < logBlockWrittenNum + 1; i++) {
        dataBlocks.add(reusableDataBlock);
    }
    assertThrows(HoodieIOException.class, () -> {
        oversizeWriter.appendBlocks(dataBlocks);
    }, "Blocks appended may overflow. Please decrease log block size or log block amount");
    oversizeWriter.close();
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) BeforeAll(org.junit.jupiter.api.BeforeAll) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SchemaTestUtil(org.apache.hudi.common.testutils.SchemaTestUtil) Path(org.apache.hadoop.fs.Path) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) MethodSource(org.junit.jupiter.params.provider.MethodSource) Schema(org.apache.avro.Schema) Collection(java.util.Collection) Compression(org.apache.hadoop.hbase.io.compress.Compression) Set(java.util.Set) HoodieArchivedLogFile(org.apache.hudi.common.model.HoodieArchivedLogFile) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) UncheckedIOException(java.io.UncheckedIOException) MiniClusterUtil(org.apache.hudi.common.testutils.minicluster.MiniClusterUtil) List(java.util.List) Stream(java.util.stream.Stream) HadoopMapRedUtils(org.apache.hudi.common.testutils.HadoopMapRedUtils) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HashSet(java.util.HashSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) IndexedRecord(org.apache.avro.generic.IndexedRecord) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) AppendResult(org.apache.hudi.common.table.log.AppendResult) IOException(java.io.IOException) HoodieLogFileReader(org.apache.hudi.common.table.log.HoodieLogFileReader) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) BenchmarkCounter(org.apache.parquet.hadoop.util.counters.BenchmarkCounter) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) ArrayList(java.util.ArrayList) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogFileReader(org.apache.hudi.common.table.log.HoodieLogFileReader) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) AppendResult(org.apache.hudi.common.table.log.AppendResult) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 13 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class ExportCommand method copyArchivedInstants.

private int copyArchivedInstants(List<FileStatus> statuses, Set<String> actionSet, int limit, String localFolder) throws Exception {
    int copyCount = 0;
    for (FileStatus fs : statuses) {
        // read the archived file
        Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(HoodieCLI.getTableMetaClient().getBasePath(), HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
        // read the avro blocks
        while (reader.hasNext() && copyCount < limit) {
            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
            try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
                while (recordItr.hasNext()) {
                    IndexedRecord ir = recordItr.next();
                    // Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
                    // metadata record from the entry and convert it to json.
                    HoodieArchivedMetaEntry archiveEntryRecord = (HoodieArchivedMetaEntry) SpecificData.get().deepCopy(HoodieArchivedMetaEntry.SCHEMA$, ir);
                    final String action = archiveEntryRecord.get("actionType").toString();
                    if (!actionSet.contains(action)) {
                        continue;
                    }
                    GenericRecord metadata = null;
                    switch(action) {
                        case HoodieTimeline.CLEAN_ACTION:
                            metadata = archiveEntryRecord.getHoodieCleanMetadata();
                            break;
                        case HoodieTimeline.COMMIT_ACTION:
                        case HoodieTimeline.DELTA_COMMIT_ACTION:
                            metadata = archiveEntryRecord.getHoodieCommitMetadata();
                            break;
                        case HoodieTimeline.ROLLBACK_ACTION:
                            metadata = archiveEntryRecord.getHoodieRollbackMetadata();
                            break;
                        case HoodieTimeline.SAVEPOINT_ACTION:
                            metadata = archiveEntryRecord.getHoodieSavePointMetadata();
                            break;
                        case HoodieTimeline.COMPACTION_ACTION:
                            metadata = archiveEntryRecord.getHoodieCompactionMetadata();
                            break;
                        default:
                            throw new HoodieException("Unknown type of action " + action);
                    }
                    final String instantTime = archiveEntryRecord.get("commitTime").toString();
                    final String outPath = localFolder + Path.SEPARATOR + instantTime + "." + action;
                    writeToFile(outPath, HoodieAvroUtils.avroToJson(metadata, true));
                    if (++copyCount == limit) {
                        break;
                    }
                }
            }
        }
        reader.close();
    }
    return copyCount;
}
Also used : HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieException(org.apache.hudi.exception.HoodieException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 14 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class TestHoodieLogFileCommand method init.

@BeforeEach
public void init() throws IOException, InterruptedException, URISyntaxException {
    HoodieCLI.conf = hadoopConf();
    // Create table and connect
    String tableName = tableName();
    tablePath = tablePath(tableName);
    partitionPath = Paths.get(tablePath, HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH).toString();
    new TableCommand().createTable(tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
    Files.createDirectories(Paths.get(partitionPath));
    fs = FSUtils.getFs(tablePath, hadoopConf());
    try (HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-log-fileid1").overBaseCommit("100").withFs(fs).build()) {
        // write data to file
        List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
        Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
        header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
        header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
        dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
        writer.appendBlock(dataBlock);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 15 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class TestHoodieLogFileCommand method testShowLogFileRecordsWithMerge.

/**
 * Test case for 'show logfile records' with merge.
 */
@Test
public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedException, URISyntaxException {
    // create commit instant
    HoodieTestCommitMetadataGenerator.createCommitFile(tablePath, INSTANT_TIME, HoodieCLI.conf);
    // write to path '2015/03/16'.
    Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
    partitionPath = tablePath + Path.SEPARATOR + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH;
    Files.createDirectories(Paths.get(partitionPath));
    HoodieLogFormat.Writer writer = null;
    try {
        // set little threshold to split file.
        writer = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-log-fileid1").overBaseCommit(INSTANT_TIME).withFs(fs).withSizeThreshold(500).build();
        List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
        Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
        header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
        header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
        HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
        writer.appendBlock(dataBlock);
    } finally {
        if (writer != null) {
            writer.close();
        }
    }
    CommandResult cr = shell().executeCommand("show logfile records --logFilePathPattern " + partitionPath + "/* --mergeRecords true");
    assertTrue(cr.isSuccess());
    // get expected result of 10 records.
    List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(partitionPath + "/*"))).map(status -> status.getPath().toString()).collect(Collectors.toList());
    HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(fs).withBasePath(tablePath).withLogFilePaths(logFilePaths).withReaderSchema(schema).withLatestInstantTime(INSTANT_TIME).withMaxMemorySizeInBytes(HoodieMemoryConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES).withReadBlocksLazily(Boolean.parseBoolean(HoodieCompactionConfig.COMPACTION_LAZY_BLOCK_READ_ENABLE.defaultValue())).withReverseReader(Boolean.parseBoolean(HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLE.defaultValue())).withBufferSize(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE.defaultValue()).withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue()).withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue()).withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()).build();
    Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
    int num = 0;
    int maxSize = 10;
    List<IndexedRecord> indexRecords = new ArrayList<>();
    while (records.hasNext() && num < maxSize) {
        Option<IndexedRecord> hoodieRecord = records.next().getData().getInsertValue(schema);
        indexRecords.add(hoodieRecord.get());
        num++;
    }
    String[][] rows = indexRecords.stream().map(r -> new String[] { r.toString() }).toArray(String[][]::new);
    assertNotNull(rows);
    String expected = HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_RECORDS }, rows);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : Path(org.apache.hadoop.fs.Path) BeforeEach(org.junit.jupiter.api.BeforeEach) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Map(java.util.Map) HoodieMemoryConfig(org.apache.hudi.config.HoodieMemoryConfig) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) SchemaTestUtil(org.apache.hudi.common.testutils.SchemaTestUtil) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CLIFunctionalTestHarness(org.apache.hudi.cli.functional.CLIFunctionalTestHarness) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) Iterator(java.util.Iterator) Files(java.nio.file.Files) TableHeader(org.apache.hudi.cli.TableHeader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HoodieTestCommitMetadataGenerator(org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Paths(java.nio.file.Paths) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CommandResult(org.springframework.shell.core.CommandResult) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FSUtils(org.apache.hudi.common.fs.FSUtils) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) Schema(org.apache.avro.Schema) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) ArrayList(java.util.ArrayList) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) CommandResult(org.springframework.shell.core.CommandResult) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieAvroDataBlock (org.apache.hudi.common.table.log.block.HoodieAvroDataBlock)16 IndexedRecord (org.apache.avro.generic.IndexedRecord)14 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)10 Path (org.apache.hadoop.fs.Path)10 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)10 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)9 IOException (java.io.IOException)8 Schema (org.apache.avro.Schema)8 List (java.util.List)7 GenericRecord (org.apache.avro.generic.GenericRecord)7 Collectors (java.util.stream.Collectors)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 Option (org.apache.hudi.common.util.Option)6 Map (java.util.Map)5 HoodieAvroUtils (org.apache.hudi.avro.HoodieAvroUtils)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 HeaderMetadataType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HoodieArchivedMetaEntry (org.apache.hudi.avro.model.HoodieArchivedMetaEntry)4