Search in sources :

Example 31 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class HoodieLogFileCommand method showLogFileCommits.

@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    FileSystem fs = HoodieCLI.getTableMetaClient().getFs();
    List<String> logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream().map(status -> status.getPath().toString()).collect(Collectors.toList());
    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = new HashMap<>();
    int numCorruptBlocks = 0;
    int dummyInstantTimeCount = 0;
    for (String logFilePath : logFilePaths) {
        FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
        Schema writerSchema = new AvroSchemaConverter().convert(Objects.requireNonNull(TableSchemaResolver.readSchemaFromLogFile(fs, new Path(logFilePath))));
        Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
        // read the avro blocks
        while (reader.hasNext()) {
            HoodieLogBlock n = reader.next();
            String instantTime;
            AtomicInteger recordCount = new AtomicInteger(0);
            if (n instanceof HoodieCorruptBlock) {
                try {
                    instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                    if (instantTime == null) {
                        throw new Exception("Invalid instant time " + instantTime);
                    }
                } catch (Exception e) {
                    numCorruptBlocks++;
                    instantTime = "corrupt_block_" + numCorruptBlocks;
                // could not read metadata for corrupt block
                }
            } else {
                instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                if (instantTime == null) {
                    // This can happen when reading archived commit files since they were written without any instant time
                    dummyInstantTimeCount++;
                    instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
                }
                if (n instanceof HoodieDataBlock) {
                    try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
                        recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
                    }
                }
            }
            if (commitCountAndMetadata.containsKey(instantTime)) {
                commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
            } else {
                List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>();
                list.add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
                commitCountAndMetadata.put(instantTime, list);
            }
        }
        reader.close();
    }
    List<Comparable[]> rows = new ArrayList<>();
    ObjectMapper objectMapper = new ObjectMapper();
    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata.entrySet()) {
        String instantTime = entry.getKey();
        for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
            Comparable[] output = new Comparable[5];
            output[0] = instantTime;
            output[1] = tuple3._3();
            output[2] = tuple3._1().toString();
            output[3] = objectMapper.writeValueAsString(tuple3._2()._1());
            output[4] = objectMapper.writeValueAsString(tuple3._2()._2());
            rows.add(output);
        }
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_RECORD_COUNT).addTableHeaderField(HoodieTableHeaderFields.HEADER_BLOCK_TYPE).addTableHeaderField(HoodieTableHeaderFields.HEADER_HEADER_METADATA).addTableHeaderField(HoodieTableHeaderFields.HEADER_FOOTER_METADATA);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieCorruptBlock(org.apache.hudi.common.table.log.block.HoodieCorruptBlock) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) FileSystem(org.apache.hadoop.fs.FileSystem) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) Map(java.util.Map) HoodieMemoryConfig(org.apache.hudi.config.HoodieMemoryConfig) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) IndexedRecord(org.apache.avro.generic.IndexedRecord) CommandMarker(org.springframework.shell.core.CommandMarker) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) TableHeader(org.apache.hudi.cli.TableHeader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Tuple3(scala.Tuple3) HoodieCLI(org.apache.hudi.cli.HoodieCLI) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Objects(java.util.Objects) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Component(org.springframework.stereotype.Component) List(java.util.List) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) Comparator(java.util.Comparator) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(org.apache.hadoop.fs.Path) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieCorruptBlock(org.apache.hudi.common.table.log.block.HoodieCorruptBlock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Tuple2(scala.Tuple2) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) Tuple3(scala.Tuple3) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 32 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class TestCommitsCommand method testShowCommitPartitionsWithReplaceCommits.

@Test
public void testShowCommitPartitionsWithReplaceCommits() throws Exception {
    Map<HoodieInstant, Integer[]> data = generateMixedData();
    for (HoodieInstant commitInstant : data.keySet()) {
        CommandResult cr = shell().executeCommand(String.format("commit showpartitions --commit %s", commitInstant.getTimestamp()));
        assertTrue(cr.isSuccess());
        Integer[] value = data.get(commitInstant);
        List<Comparable[]> rows = new ArrayList<>();
        // prevCommit not null, so add 0, update 1
        Arrays.asList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).stream().forEach(partition -> rows.add(new Comparable[] { commitInstant.getAction(), partition, 0, 1, 0, value[1], HoodieTestCommitMetadataGenerator.DEFAULT_TOTAL_WRITE_BYTES, 0 }));
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
        TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_ADDED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_INSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS);
        String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
        expected = removeNonWordAndStripSpace(expected);
        String got = removeNonWordAndStripSpace(cr.getResult().toString());
        assertEquals(expected, got);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) CommandResult(org.springframework.shell.core.CommandResult) Function(java.util.function.Function) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 33 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class TestCommitsCommand method testShowCommitFilesWithReplaceCommits.

@Test
public void testShowCommitFilesWithReplaceCommits() throws Exception {
    Map<HoodieInstant, Integer[]> data = generateMixedData();
    for (HoodieInstant commitInstant : data.keySet()) {
        CommandResult cr = shell().executeCommand(String.format("commit showfiles --commit %s", commitInstant.getTimestamp()));
        assertTrue(cr.isSuccess());
        Integer[] value = data.get(commitInstant);
        List<Comparable[]> rows = new ArrayList<>();
        Arrays.asList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).stream().forEach(partition -> rows.add(new Comparable[] { commitInstant.getAction(), partition, HoodieTestCommitMetadataGenerator.DEFAULT_FILEID, HoodieTestCommitMetadataGenerator.DEFAULT_PRE_COMMIT, value[1], value[0], HoodieTestCommitMetadataGenerator.DEFAULT_TOTAL_WRITE_BYTES, // default 0 errors and blank file with 0 size
        0, 0 }));
        TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_SIZE);
        String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
        expected = removeNonWordAndStripSpace(expected);
        String got = removeNonWordAndStripSpace(cr.getResult().toString());
        assertEquals(expected, got);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) CommandResult(org.springframework.shell.core.CommandResult) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 34 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class TestCompactionCommand method testCompactionsAll.

/**
 * Test case for command 'compactions show all'.
 */
@Test
public void testCompactionsAll() throws IOException {
    // create MOR table.
    new TableCommand().createTable(tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, HoodieAvroPayload.class.getName());
    CompactionTestUtils.setupAndValidateCompactionOperations(HoodieCLI.getTableMetaClient(), false, 3, 4, 3, 3);
    HoodieCLI.getTableMetaClient().reloadActiveTimeline();
    CommandResult cr = shell().executeCommand("compactions show all");
    System.out.println(cr.getResult().toString());
    TableHeader header = new TableHeader().addTableHeaderField("Compaction Instant Time").addTableHeaderField("State").addTableHeaderField("Total FileIds to be Compacted");
    Map<String, Integer> fileIds = new HashMap();
    fileIds.put("001", 3);
    fileIds.put("003", 4);
    fileIds.put("005", 3);
    fileIds.put("007", 3);
    List<Comparable[]> rows = new ArrayList<>();
    Arrays.asList("001", "003", "005", "007").stream().sorted(Comparator.reverseOrder()).forEach(instant -> {
        rows.add(new Comparable[] { instant, "REQUESTED", fileIds.get(instant) });
    });
    String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
    assertEquals(expected, cr.getResult().toString());
}
Also used : TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) CommandResult(org.springframework.shell.core.CommandResult) Test(org.junit.jupiter.api.Test)

Example 35 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class TestFileSystemViewCommand method testShowCommitsWithSpecifiedValues.

/**
 * Test case for 'show fsview all' with specified values.
 */
@Test
public void testShowCommitsWithSpecifiedValues() {
    // Test command with options, baseFileOnly and maxInstant is 2
    CommandResult cr = shell().executeCommand("show fsview all --baseFileOnly true --maxInstant 2");
    assertTrue(cr.isSuccess());
    List<Comparable[]> rows = new ArrayList<>();
    Stream<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath);
    // Only get instant 1, since maxInstant was specified 2
    fileGroups.forEach(fg -> fg.getAllFileSlices().filter(fs -> fs.getBaseInstantTime().equals("1")).forEach(fs -> {
        int idx = 0;
        // For base file only Views, do not display any delta-file related columns.
        Comparable[] row = new Comparable[5];
        row[idx++] = fg.getPartitionPath();
        row[idx++] = fg.getFileGroupId().getFileId();
        row[idx++] = fs.getBaseInstantTime();
        row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getPath() : "";
        row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getFileSize() : -1;
        rows.add(row);
    }));
    Function<Object, String> converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILE_SIZE, converterFunction);
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE, converterFunction);
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE);
    String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CLIFunctionalTestHarness(org.apache.hudi.cli.functional.CLIFunctionalTestHarness) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) Files(java.nio.file.Files) TableHeader(org.apache.hudi.cli.TableHeader) HoodieTestCommitMetadataGenerator(org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator) IOException(java.io.IOException) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CommandResult(org.springframework.shell.core.CommandResult) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) NumericUtils(org.apache.hudi.common.util.NumericUtils) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CommandResult(org.springframework.shell.core.CommandResult) Function(java.util.function.Function) Test(org.junit.jupiter.api.Test)

Aggregations

ArrayList (java.util.ArrayList)45 TableHeader (org.apache.hudi.cli.TableHeader)45 HashMap (java.util.HashMap)33 CliCommand (org.springframework.shell.core.annotation.CliCommand)22 Map (java.util.Map)19 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 List (java.util.List)18 Test (org.junit.jupiter.api.Test)18 CommandResult (org.springframework.shell.core.CommandResult)18 IOException (java.io.IOException)17 Function (java.util.function.Function)17 HoodieCLI (org.apache.hudi.cli.HoodieCLI)15 HoodiePrintHelper (org.apache.hudi.cli.HoodiePrintHelper)15 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)15 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)14 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)12 Collectors (java.util.stream.Collectors)11 Path (org.apache.hadoop.fs.Path)10 HoodieTableHeaderFields (org.apache.hudi.cli.HoodieTableHeaderFields)10 FSUtils (org.apache.hudi.common.fs.FSUtils)9