Search in sources :

Example 6 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class ArchivedCommitsCommand method showArchivedCommits.

@CliCommand(value = "show archived commit stats", help = "Read commits from archived files and show details")
public String showArchivedCommits(@CliOption(key = { "archiveFolderPattern" }, help = "Archive Folder", unspecifiedDefaultValue = "") String folder, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    System.out.println("===============> Showing only " + limit + " archived commits <===============");
    String basePath = HoodieCLI.getTableMetaClient().getBasePath();
    Path archivePath = new Path(HoodieCLI.getTableMetaClient().getArchivePath() + "/.commits_.archive*");
    if (folder != null && !folder.isEmpty()) {
        archivePath = new Path(basePath + "/.hoodie/" + folder);
    }
    FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath);
    List<Comparable[]> allStats = new ArrayList<>();
    for (FileStatus fs : fsStatuses) {
        // read the archived file
        Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
        List<IndexedRecord> readRecords = new ArrayList<>();
        // read the avro blocks
        while (reader.hasNext()) {
            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
            blk.getRecordItr().forEachRemaining(readRecords::add);
        }
        List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r).filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION) || r.get("actionType").toString().equals(HoodieTimeline.DELTA_COMMIT_ACTION)).flatMap(r -> {
            HoodieCommitMetadata metadata = (HoodieCommitMetadata) SpecificData.get().deepCopy(HoodieCommitMetadata.SCHEMA$, r.get("hoodieCommitMetadata"));
            final String instantTime = r.get("commitTime").toString();
            final String action = r.get("actionType").toString();
            return metadata.getPartitionToWriteStats().values().stream().flatMap(hoodieWriteStats -> hoodieWriteStats.stream().map(hoodieWriteStat -> {
                List<Comparable> row = new ArrayList<>();
                row.add(action);
                row.add(instantTime);
                row.add(hoodieWriteStat.getPartitionPath());
                row.add(hoodieWriteStat.getFileId());
                row.add(hoodieWriteStat.getPrevCommit());
                row.add(hoodieWriteStat.getNumWrites());
                row.add(hoodieWriteStat.getNumInserts());
                row.add(hoodieWriteStat.getNumDeletes());
                row.add(hoodieWriteStat.getNumUpdateWrites());
                row.add(hoodieWriteStat.getTotalLogFiles());
                row.add(hoodieWriteStat.getTotalLogBlocks());
                row.add(hoodieWriteStat.getTotalCorruptLogBlock());
                row.add(hoodieWriteStat.getTotalRollbackBlocks());
                row.add(hoodieWriteStat.getTotalLogRecords());
                row.add(hoodieWriteStat.getTotalUpdatedRecordsCompacted());
                row.add(hoodieWriteStat.getTotalWriteBytes());
                row.add(hoodieWriteStat.getTotalWriteErrors());
                return row;
            })).map(rowList -> rowList.toArray(new Comparable[0]));
        }).collect(Collectors.toList());
        allStats.addAll(readCommits);
        reader.close();
    }
    TableHeader header = new TableHeader().addTableHeaderField("action").addTableHeaderField("instant").addTableHeaderField("partition").addTableHeaderField("file_id").addTableHeaderField("prev_instant").addTableHeaderField("num_writes").addTableHeaderField("num_inserts").addTableHeaderField("num_deletes").addTableHeaderField("num_update_writes").addTableHeaderField("total_log_files").addTableHeaderField("total_log_blocks").addTableHeaderField("total_corrupt_log_blocks").addTableHeaderField("total_rollback_blocks").addTableHeaderField("total_log_records").addTableHeaderField("total_updated_records_compacted").addTableHeaderField("total_write_bytes").addTableHeaderField("total_write_errors");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, allStats);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) ArrayList(java.util.ArrayList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) SpecificData(org.apache.avro.specific.SpecificData) CommandMarker(org.springframework.shell.core.CommandMarker) GenericRecord(org.apache.avro.generic.GenericRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieCommitMetadata(org.apache.hudi.avro.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieCommitMetadata(org.apache.hudi.avro.model.HoodieCommitMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 7 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class TestHoodieLogFormat method testV0Format.

@Test
public void testV0Format() throws IOException, URISyntaxException {
    // HoodieLogFormatVersion.DEFAULT_VERSION has been deprecated so we cannot
    // create a writer for it. So these tests are only for the HoodieAvroDataBlock
    // of older version.
    Schema schema = getSimpleSchema();
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
    List<IndexedRecord> recordsCopy = new ArrayList<>(records);
    assertEquals(100, records.size());
    assertEquals(100, recordsCopy.size());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, schema);
    byte[] content = dataBlock.getBytes(schema);
    assertTrue(content.length > 0);
    HoodieLogBlock logBlock = HoodieAvroDataBlock.getBlock(content, schema);
    assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
    List<IndexedRecord> readRecords = getRecords((HoodieAvroDataBlock) logBlock);
    assertEquals(readRecords.size(), recordsCopy.size());
    for (int i = 0; i < recordsCopy.size(); ++i) {
        assertEquals(recordsCopy.get(i), readRecords.get(i));
    }
    // Reader schema is optional if it is same as write schema
    logBlock = HoodieAvroDataBlock.getBlock(content, null);
    assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
    readRecords = getRecords((HoodieAvroDataBlock) logBlock);
    assertEquals(readRecords.size(), recordsCopy.size());
    for (int i = 0; i < recordsCopy.size(); ++i) {
        assertEquals(recordsCopy.get(i), readRecords.get(i));
    }
}
Also used : HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) SchemaTestUtil.getSimpleSchema(org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema) ArrayList(java.util.ArrayList) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class TestHoodieLogFormatAppendFailure method testFailedToGetAppendStreamFromHDFSNameNode.

@Test
@Timeout(60)
public void testFailedToGetAppendStreamFromHDFSNameNode() throws IOException, URISyntaxException, InterruptedException, TimeoutException {
    // Use some fs like LocalFileSystem, that does not support appends
    String uuid = UUID.randomUUID().toString();
    Path localPartitionPath = new Path("/tmp/");
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path(localPartitionPath, uuid);
    fs.mkdirs(testPath);
    // Some data & append.
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10);
    Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(2);
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
    Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath).withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("").withFs(fs).build();
    writer.appendBlock(dataBlock);
    // get the current log file version to compare later
    int logFileVersion = writer.getLogFile().getLogVersion();
    Path logFilePath = writer.getLogFile().getPath();
    writer.close();
    // Wait for 3 times replication of file
    DFSTestUtil.waitReplication(fs, logFilePath, (short) 3);
    // Shut down all DNs that have the last block location for the file
    LocatedBlocks lbs = cluster.getFileSystem().getClient().getNamenode().getBlockLocations("/tmp/" + uuid + "/" + logFilePath.getName(), 0, Long.MAX_VALUE);
    List<DataNode> dnsOfCluster = cluster.getDataNodes();
    DatanodeInfo[] dnsWithLocations = lbs.getLastLocatedBlock().getLocations();
    for (DataNode dn : dnsOfCluster) {
        for (DatanodeInfo loc : dnsWithLocations) {
            if (dn.getDatanodeId().equals(loc)) {
                dn.shutdown();
                cluster.stopDataNode(dn.getDisplayName());
                DFSTestUtil.waitForDatanodeDeath(dn);
            }
        }
    }
    // Wait for the replication of this file to go down to 0
    DFSTestUtil.waitReplication(fs, logFilePath, (short) 0);
    // Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and
    // return a new writer with a bumped up logVersion
    writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath).withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("").withFs(fs).build();
    header = new HashMap<>();
    header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE, String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
    writer.appendBlock(new HoodieCommandBlock(header));
    // The log version should be different for this new writer
    assertNotEquals(writer.getLogFile().getLogVersion(), logFileVersion);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 9 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class HoodieWriteableTestTable method appendRecordsToLogFile.

private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
    String partitionPath = groupedRecords.get(0).getPartitionPath();
    HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
    try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
        Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
        header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
        header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
        logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
            try {
                GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
                HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
                return (IndexedRecord) val;
            } catch (IOException e) {
                LOG.warn("Failed to convert record " + r.toString(), e);
                return null;
            }
        }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
        return Pair.of(partitionPath, logWriter.getLogFile());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileSystem(org.apache.hadoop.fs.FileSystem) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieAvroParquetConfig(org.apache.hudi.io.storage.HoodieAvroParquetConfig) FileCreateUtils.baseFileName(org.apache.hudi.common.testutils.FileCreateUtils.baseFileName) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieParquetWriter(org.apache.hudi.io.storage.HoodieParquetWriter) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) CompressionKind(org.apache.orc.CompressionKind) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) HoodieOrcWriter(org.apache.hudi.io.storage.HoodieOrcWriter) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieOrcConfig(org.apache.hudi.io.storage.HoodieOrcConfig) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Paths(java.nio.file.Paths) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieAvroWriteSupport(org.apache.hudi.avro.HoodieAvroWriteSupport) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) LogManager(org.apache.log4j.LogManager) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 10 with HoodieAvroDataBlock

use of org.apache.hudi.common.table.log.block.HoodieAvroDataBlock in project hudi by apache.

the class HoodieLogFileReader method readBlock.

// TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
// for max of Integer size
private HoodieLogBlock readBlock() throws IOException {
    int blockSize;
    try {
        // 1 Read the total size of the block
        blockSize = (int) inputStream.readLong();
    } catch (EOFException | CorruptedLogFileException e) {
        // Create a corrupt block by finding the next MAGIC marker or EOF
        return createCorruptBlock();
    }
    // We may have had a crash which could have written this block partially
    // Skip blockSize in the stream and we should either find a sync marker (start of the next
    // block) or EOF. If we did not find either of it, then this block is a corrupted block.
    boolean isCorrupted = isBlockCorrupted(blockSize);
    if (isCorrupted) {
        return createCorruptBlock();
    }
    // 2. Read the version for this log format
    HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
    // 3. Read the block type for a log block
    HoodieLogBlockType blockType = tryReadBlockType(nextBlockVersion);
    // 4. Read the header for a log block, if present
    Map<HeaderMetadataType, String> header = nextBlockVersion.hasHeader() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // 5. Read the content length for the content
    // Fallback to full-block size if no content-length
    // TODO replace w/ hasContentLength
    int contentLength = nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION ? (int) inputStream.readLong() : blockSize;
    // 6. Read the content or skip content based on IO vs Memory trade-off by client
    long contentPosition = inputStream.getPos();
    boolean shouldReadLazily = readBlockLazily && nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION;
    Option<byte[]> content = HoodieLogBlock.tryReadContent(inputStream, contentLength, shouldReadLazily);
    // 7. Read footer if any
    Map<HeaderMetadataType, String> footer = nextBlockVersion.hasFooter() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // log file in reverse
    if (nextBlockVersion.hasLogBlockLength()) {
        inputStream.readLong();
    }
    // 9. Read the log block end position in the log file
    long blockEndPos = inputStream.getPos();
    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, contentLength, blockEndPos);
    switch(Objects.requireNonNull(blockType)) {
        case AVRO_DATA_BLOCK:
            if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
                return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
            } else {
                return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
            }
        case HFILE_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
        case PARQUET_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieParquetDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
        case DELETE_BLOCK:
            return new HoodieDeleteBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        case COMMAND_BLOCK:
            return new HoodieCommandBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        default:
            throw new HoodieNotSupportedException("Unsupported Block " + blockType);
    }
}
Also used : HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) EOFException(java.io.EOFException)

Aggregations

HoodieAvroDataBlock (org.apache.hudi.common.table.log.block.HoodieAvroDataBlock)16 IndexedRecord (org.apache.avro.generic.IndexedRecord)14 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)10 Path (org.apache.hadoop.fs.Path)10 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)10 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)9 IOException (java.io.IOException)8 Schema (org.apache.avro.Schema)8 List (java.util.List)7 GenericRecord (org.apache.avro.generic.GenericRecord)7 Collectors (java.util.stream.Collectors)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 Option (org.apache.hudi.common.util.Option)6 Map (java.util.Map)5 HoodieAvroUtils (org.apache.hudi.avro.HoodieAvroUtils)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 HeaderMetadataType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HoodieArchivedMetaEntry (org.apache.hudi.avro.model.HoodieArchivedMetaEntry)4