Search in sources :

Example 81 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestHoodieTableMetaClient method checkCommitTimeline.

@Test
public void checkCommitTimeline() {
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
    assertTrue(activeCommitTimeline.empty(), "Should be empty commit timeline");
    HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
    activeTimeline.createNewInstant(instant);
    activeTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes()));
    // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached
    activeTimeline = metaClient.getActiveTimeline();
    activeCommitTimeline = activeTimeline.getCommitTimeline();
    assertTrue(activeCommitTimeline.empty(), "Should be empty commit timeline");
    HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
    activeTimeline = activeTimeline.reload();
    activeCommitTimeline = activeTimeline.getCommitTimeline();
    assertFalse(activeCommitTimeline.empty(), "Should be the 1 commit we made");
    assertEquals(completedInstant, activeCommitTimeline.getInstants().findFirst().get(), "Commit should be 1");
    assertArrayEquals("test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\"");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Test(org.junit.jupiter.api.Test)

Example 82 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestTimelineUtils method testGetExtraMetadata.

@Test
public void testGetExtraMetadata() throws Exception {
    String extraMetadataKey = "test_key";
    String extraMetadataValue1 = "test_value1";
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
    assertTrue(activeCommitTimeline.empty());
    assertFalse(TimelineUtils.getExtraMetadataFromLatest(metaClient, extraMetadataKey).isPresent());
    String ts = "0";
    HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, ts);
    activeTimeline.createNewInstant(instant);
    activeTimeline.saveAsComplete(instant, Option.of(getCommitMetadata(basePath, ts, ts, 2, Collections.emptyMap())));
    ts = "1";
    instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, ts);
    activeTimeline.createNewInstant(instant);
    Map<String, String> extraMetadata = new HashMap<>();
    extraMetadata.put(extraMetadataKey, extraMetadataValue1);
    activeTimeline.saveAsComplete(instant, Option.of(getCommitMetadata(basePath, ts, ts, 2, extraMetadata)));
    metaClient.reloadActiveTimeline();
    // verify modified partitions included cleaned data
    verifyExtraMetadataLatestValue(extraMetadataKey, extraMetadataValue1, false);
    assertFalse(TimelineUtils.getExtraMetadataFromLatest(metaClient, "unknownKey").isPresent());
    // verify adding clustering commit doesnt change behavior of getExtraMetadataFromLatest
    String ts2 = "2";
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, ts2);
    activeTimeline.createNewInstant(instant2);
    String newValueForMetadata = "newValue2";
    extraMetadata.put(extraMetadataKey, newValueForMetadata);
    activeTimeline.saveAsComplete(instant2, Option.of(getReplaceCommitMetadata(basePath, ts2, "p2", 0, "p2", 3, extraMetadata, WriteOperationType.CLUSTER)));
    metaClient.reloadActiveTimeline();
    verifyExtraMetadataLatestValue(extraMetadataKey, extraMetadataValue1, false);
    verifyExtraMetadataLatestValue(extraMetadataKey, newValueForMetadata, true);
    assertFalse(TimelineUtils.getExtraMetadataFromLatest(metaClient, "unknownKey").isPresent());
    Map<String, Option<String>> extraMetadataEntries = TimelineUtils.getAllExtraMetadataForKey(metaClient, extraMetadataKey);
    assertEquals(3, extraMetadataEntries.size());
    assertFalse(extraMetadataEntries.get("0").isPresent());
    assertTrue(extraMetadataEntries.get("1").isPresent());
    assertEquals(extraMetadataValue1, extraMetadataEntries.get("1").get());
    assertTrue(extraMetadataEntries.get("2").isPresent());
    assertEquals(newValueForMetadata, extraMetadataEntries.get("2").get());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Option(org.apache.hudi.common.util.Option) Test(org.junit.jupiter.api.Test)

Example 83 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class CommitsCommand method showCommitFiles.

@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
public String showCommitFiles(@CliOption(key = { "createView" }, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, @CliOption(key = { "commit" }, help = "Commit to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
    Option<HoodieInstant> hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);
    if (!commitMetadataOptional.isPresent()) {
        return "Commit " + instantTime + " not found in Commits " + timeline;
    }
    HoodieCommitMetadata meta = commitMetadataOptional.get();
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
        String action = hoodieInstantOption.get().getAction();
        String path = entry.getKey();
        List<HoodieWriteStat> stats = entry.getValue();
        for (HoodieWriteStat stat : stats) {
            rows.add(new Comparable[] { action, path, stat.getFileId(), stat.getPrevCommit(), stat.getNumUpdateWrites(), stat.getNumWrites(), stat.getTotalWriteBytes(), stat.getTotalWriteErrors(), stat.getFileSizeInBytes() });
        }
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_SIZE);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows, exportTableName);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 84 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class CommitsCommand method showCommitPartitions.

@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
public String showCommitPartitions(@CliOption(key = { "createView" }, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, @CliOption(key = { "commit" }, help = "Commit to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
    Option<HoodieInstant> hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);
    if (!commitMetadataOptional.isPresent()) {
        return "Commit " + instantTime + " not found in Commits " + timeline;
    }
    HoodieCommitMetadata meta = commitMetadataOptional.get();
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
        String action = hoodieInstantOption.get().getAction();
        String path = entry.getKey();
        List<HoodieWriteStat> stats = entry.getValue();
        long totalFilesAdded = 0;
        long totalFilesUpdated = 0;
        long totalRecordsUpdated = 0;
        long totalRecordsInserted = 0;
        long totalBytesWritten = 0;
        long totalWriteErrors = 0;
        for (HoodieWriteStat stat : stats) {
            if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
                totalFilesAdded += 1;
            } else {
                totalFilesUpdated += 1;
                totalRecordsUpdated += stat.getNumUpdateWrites();
            }
            totalRecordsInserted += stat.getNumInserts();
            totalBytesWritten += stat.getTotalWriteBytes();
            totalWriteErrors += stat.getTotalWriteErrors();
        }
        rows.add(new Comparable[] { action, path, totalFilesAdded, totalFilesUpdated, totalRecordsInserted, totalRecordsUpdated, totalBytesWritten, totalWriteErrors });
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_ADDED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_INSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS);
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows, exportTableName);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Function(java.util.function.Function) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 85 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class CommitsCommand method showWriteStats.

@CliCommand(value = "commit show_write_stats", help = "Show write stats of a commit")
public String showWriteStats(@CliOption(key = { "createView" }, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, @CliOption(key = { "commit" }, help = "Commit to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
    Option<HoodieInstant> hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);
    if (!commitMetadataOptional.isPresent()) {
        return "Commit " + instantTime + " not found in Commits " + timeline;
    }
    HoodieCommitMetadata meta = commitMetadataOptional.get();
    String action = hoodieInstantOption.get().getAction();
    long recordsWritten = meta.fetchTotalRecordsWritten();
    long bytesWritten = meta.fetchTotalBytesWritten();
    long avgRecSize = (long) Math.ceil((1.0 * bytesWritten) / recordsWritten);
    List<Comparable[]> rows = new ArrayList<>();
    rows.add(new Comparable[] { action, bytesWritten, recordsWritten, avgRecSize });
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_AVG_REC_SIZE_COMMIT);
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows, exportTableName);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Function(java.util.function.Function) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)95 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)70 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)47 Test (org.junit.jupiter.api.Test)45 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)37 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)32 List (java.util.List)30 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)30 HashMap (java.util.HashMap)28 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)26 Map (java.util.Map)25 Option (org.apache.hudi.common.util.Option)22 Pair (org.apache.hudi.common.util.collection.Pair)22 Collectors (java.util.stream.Collectors)21 Path (org.apache.hadoop.fs.Path)21 Logger (org.apache.log4j.Logger)21 LogManager (org.apache.log4j.LogManager)20 Stream (java.util.stream.Stream)19 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)19