Search in sources :

Example 1 with HoodieLogBlockType

use of org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType in project hudi by apache.

the class TestHoodieLogFormat method testBasicAppendAndRead.

@ParameterizedTest
@EnumSource(names = { "AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK" })
public void testBasicAppendAndRead(HoodieLogBlockType dataBlockType) throws IOException, URISyntaxException, InterruptedException {
    Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
    List<IndexedRecord> records1 = SchemaTestUtil.generateTestRecords(0, 100);
    Schema schema = getSimpleSchema();
    List<IndexedRecord> copyOfRecords1 = records1.stream().map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
    Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    HoodieDataBlock dataBlock = getDataBlock(dataBlockType, records1, header);
    writer.appendBlock(dataBlock);
    writer.close();
    writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
    List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
    List<IndexedRecord> copyOfRecords2 = records2.stream().map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    dataBlock = getDataBlock(dataBlockType, records2, header);
    writer.appendBlock(dataBlock);
    writer.close();
    // Close and Open again and append 100 more records
    writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
    List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
    List<IndexedRecord> copyOfRecords3 = records3.stream().map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    dataBlock = getDataBlock(dataBlockType, records3, header);
    writer.appendBlock(dataBlock);
    writer.close();
    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
    assertTrue(reader.hasNext(), "First block should be available");
    HoodieLogBlock nextBlock = reader.next();
    HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
    List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
    assertEquals(copyOfRecords1.size(), recordsRead1.size(), "Read records size should be equal to the written records size");
    assertEquals(copyOfRecords1, recordsRead1, "Both records lists should be the same. (ordering guaranteed)");
    assertEquals(dataBlockRead.getSchema(), getSimpleSchema());
    reader.hasNext();
    nextBlock = reader.next();
    dataBlockRead = (HoodieDataBlock) nextBlock;
    List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
    assertEquals(copyOfRecords2.size(), recordsRead2.size(), "Read records size should be equal to the written records size");
    assertEquals(copyOfRecords2, recordsRead2, "Both records lists should be the same. (ordering guaranteed)");
    reader.hasNext();
    nextBlock = reader.next();
    dataBlockRead = (HoodieDataBlock) nextBlock;
    List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
    assertEquals(copyOfRecords3.size(), recordsRead3.size(), "Read records size should be equal to the written records size");
    assertEquals(copyOfRecords3, recordsRead3, "Both records lists should be the same. (ordering guaranteed)");
    reader.close();
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) BeforeAll(org.junit.jupiter.api.BeforeAll) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SchemaTestUtil(org.apache.hudi.common.testutils.SchemaTestUtil) Path(org.apache.hadoop.fs.Path) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) MethodSource(org.junit.jupiter.params.provider.MethodSource) Schema(org.apache.avro.Schema) Collection(java.util.Collection) Compression(org.apache.hadoop.hbase.io.compress.Compression) Set(java.util.Set) HoodieArchivedLogFile(org.apache.hudi.common.model.HoodieArchivedLogFile) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) UncheckedIOException(java.io.UncheckedIOException) MiniClusterUtil(org.apache.hudi.common.testutils.minicluster.MiniClusterUtil) List(java.util.List) Stream(java.util.stream.Stream) HadoopMapRedUtils(org.apache.hudi.common.testutils.HadoopMapRedUtils) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HashSet(java.util.HashSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) IndexedRecord(org.apache.avro.generic.IndexedRecord) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) AppendResult(org.apache.hudi.common.table.log.AppendResult) IOException(java.io.IOException) HoodieLogFileReader(org.apache.hudi.common.table.log.HoodieLogFileReader) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) BenchmarkCounter(org.apache.parquet.hadoop.util.counters.BenchmarkCounter) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogFileReader(org.apache.hudi.common.table.log.HoodieLogFileReader) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with HoodieLogBlockType

use of org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType in project hudi by apache.

the class HoodieLogFileReader method readBlock.

// TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
// for max of Integer size
private HoodieLogBlock readBlock() throws IOException {
    int blockSize;
    try {
        // 1 Read the total size of the block
        blockSize = (int) inputStream.readLong();
    } catch (EOFException | CorruptedLogFileException e) {
        // Create a corrupt block by finding the next MAGIC marker or EOF
        return createCorruptBlock();
    }
    // We may have had a crash which could have written this block partially
    // Skip blockSize in the stream and we should either find a sync marker (start of the next
    // block) or EOF. If we did not find either of it, then this block is a corrupted block.
    boolean isCorrupted = isBlockCorrupted(blockSize);
    if (isCorrupted) {
        return createCorruptBlock();
    }
    // 2. Read the version for this log format
    HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
    // 3. Read the block type for a log block
    HoodieLogBlockType blockType = tryReadBlockType(nextBlockVersion);
    // 4. Read the header for a log block, if present
    Map<HeaderMetadataType, String> header = nextBlockVersion.hasHeader() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // 5. Read the content length for the content
    // Fallback to full-block size if no content-length
    // TODO replace w/ hasContentLength
    int contentLength = nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION ? (int) inputStream.readLong() : blockSize;
    // 6. Read the content or skip content based on IO vs Memory trade-off by client
    long contentPosition = inputStream.getPos();
    boolean shouldReadLazily = readBlockLazily && nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION;
    Option<byte[]> content = HoodieLogBlock.tryReadContent(inputStream, contentLength, shouldReadLazily);
    // 7. Read footer if any
    Map<HeaderMetadataType, String> footer = nextBlockVersion.hasFooter() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // log file in reverse
    if (nextBlockVersion.hasLogBlockLength()) {
        inputStream.readLong();
    }
    // 9. Read the log block end position in the log file
    long blockEndPos = inputStream.getPos();
    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, contentLength, blockEndPos);
    switch(Objects.requireNonNull(blockType)) {
        case AVRO_DATA_BLOCK:
            if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
                return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
            } else {
                return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
            }
        case HFILE_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
        case PARQUET_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieParquetDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
        case DELETE_BLOCK:
            return new HoodieDeleteBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        case COMMAND_BLOCK:
            return new HoodieCommandBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        default:
            throw new HoodieNotSupportedException("Unsupported Block " + blockType);
    }
}
Also used : HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) EOFException(java.io.EOFException)

Example 3 with HoodieLogBlockType

use of org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType in project hudi by apache.

the class TestHoodieLogFormat method testDataBlockFormatAppendAndReadWithProjectedSchema.

@ParameterizedTest
@EnumSource(names = { "AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK" })
public void testDataBlockFormatAppendAndReadWithProjectedSchema(HoodieLogBlockType dataBlockType) throws IOException, URISyntaxException, InterruptedException {
    Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
    List<GenericRecord> records = SchemaTestUtil.generateTestGenericRecords(0, 1000);
    Schema schema = getSimpleSchema();
    Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<HoodieLogBlock.HeaderMetadataType, String>() {

        {
            put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
            put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
        }
    };
    // Init Benchmark to report number of bytes actually read from the Block
    BenchmarkCounter.initCounterFromReporter(HadoopMapRedUtils.createTestReporter(), fs.getConf());
    // NOTE: Have to use this ugly hack since List generic is not covariant in its type param
    HoodieDataBlock dataBlock = getDataBlock(dataBlockType, (List<IndexedRecord>) (List) records, header);
    writer.appendBlock(dataBlock);
    writer.close();
    Schema projectedSchema = HoodieAvroUtils.generateProjectionSchema(schema, Collections.singletonList("name"));
    List<GenericRecord> projectedRecords = HoodieAvroUtils.rewriteRecords(records, projectedSchema);
    try (Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), projectedSchema, true, false)) {
        assertTrue(reader.hasNext(), "First block should be available");
        HoodieLogBlock nextBlock = reader.next();
        HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
        Map<HoodieLogBlockType, Integer> expectedReadBytes = new HashMap<HoodieLogBlockType, Integer>() {

            {
                // not supported
                put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0);
                // not supported
                put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0);
                put(HoodieLogBlockType.PARQUET_DATA_BLOCK, 2605);
            }
        };
        List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
        assertEquals(projectedRecords.size(), recordsRead.size(), "Read records size should be equal to the written records size");
        assertEquals(projectedRecords, recordsRead, "Both records lists should be the same. (ordering guaranteed)");
        assertEquals(dataBlockRead.getSchema(), projectedSchema);
        int bytesRead = (int) BenchmarkCounter.getBytesRead();
        assertEquals(expectedReadBytes.get(dataBlockType), bytesRead, "Read bytes have to match");
    }
}
Also used : HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogFileReader(org.apache.hudi.common.table.log.HoodieLogFileReader) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) List(java.util.List) ArrayList(java.util.ArrayList) GenericRecord(org.apache.avro.generic.GenericRecord) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with HoodieLogBlockType

use of org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType in project hudi by apache.

the class HoodieLogFileCommand method showLogFileCommits.

@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    FileSystem fs = HoodieCLI.getTableMetaClient().getFs();
    List<String> logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream().map(status -> status.getPath().toString()).collect(Collectors.toList());
    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = new HashMap<>();
    int numCorruptBlocks = 0;
    int dummyInstantTimeCount = 0;
    for (String logFilePath : logFilePaths) {
        FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
        Schema writerSchema = new AvroSchemaConverter().convert(Objects.requireNonNull(TableSchemaResolver.readSchemaFromLogFile(fs, new Path(logFilePath))));
        Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
        // read the avro blocks
        while (reader.hasNext()) {
            HoodieLogBlock n = reader.next();
            String instantTime;
            AtomicInteger recordCount = new AtomicInteger(0);
            if (n instanceof HoodieCorruptBlock) {
                try {
                    instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                    if (instantTime == null) {
                        throw new Exception("Invalid instant time " + instantTime);
                    }
                } catch (Exception e) {
                    numCorruptBlocks++;
                    instantTime = "corrupt_block_" + numCorruptBlocks;
                // could not read metadata for corrupt block
                }
            } else {
                instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                if (instantTime == null) {
                    // This can happen when reading archived commit files since they were written without any instant time
                    dummyInstantTimeCount++;
                    instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
                }
                if (n instanceof HoodieDataBlock) {
                    try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
                        recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
                    }
                }
            }
            if (commitCountAndMetadata.containsKey(instantTime)) {
                commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
            } else {
                List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>();
                list.add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
                commitCountAndMetadata.put(instantTime, list);
            }
        }
        reader.close();
    }
    List<Comparable[]> rows = new ArrayList<>();
    ObjectMapper objectMapper = new ObjectMapper();
    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata.entrySet()) {
        String instantTime = entry.getKey();
        for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
            Comparable[] output = new Comparable[5];
            output[0] = instantTime;
            output[1] = tuple3._3();
            output[2] = tuple3._1().toString();
            output[3] = objectMapper.writeValueAsString(tuple3._2()._1());
            output[4] = objectMapper.writeValueAsString(tuple3._2()._2());
            rows.add(output);
        }
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_RECORD_COUNT).addTableHeaderField(HoodieTableHeaderFields.HEADER_BLOCK_TYPE).addTableHeaderField(HoodieTableHeaderFields.HEADER_HEADER_METADATA).addTableHeaderField(HoodieTableHeaderFields.HEADER_FOOTER_METADATA);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieCorruptBlock(org.apache.hudi.common.table.log.block.HoodieCorruptBlock) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) FileSystem(org.apache.hadoop.fs.FileSystem) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) Map(java.util.Map) HoodieMemoryConfig(org.apache.hudi.config.HoodieMemoryConfig) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) CommandMarker(org.springframework.shell.core.CommandMarker) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) TableHeader(org.apache.hudi.cli.TableHeader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Tuple3(scala.Tuple3) HoodieCLI(org.apache.hudi.cli.HoodieCLI) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Objects(java.util.Objects) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Component(org.springframework.stereotype.Component) List(java.util.List) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) Comparator(java.util.Comparator) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieCorruptBlock(org.apache.hudi.common.table.log.block.HoodieCorruptBlock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tuple2(scala.Tuple2) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Tuple3(scala.Tuple3) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Schema (org.apache.avro.Schema)3 IndexedRecord (org.apache.avro.generic.IndexedRecord)3 Reader (org.apache.hudi.common.table.log.HoodieLogFormat.Reader)3 HoodieDataBlock (org.apache.hudi.common.table.log.block.HoodieDataBlock)3 HoodieLogBlock (org.apache.hudi.common.table.log.block.HoodieLogBlock)3 HeaderMetadataType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType)3 HoodieLogBlockType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType)3 IOException (java.io.IOException)2 Map (java.util.Map)2 Collectors (java.util.stream.Collectors)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 FSUtils (org.apache.hudi.common.fs.FSUtils)2 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)2 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)2