Search in sources :

Example 11 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class TestStatsCommand method testWriteAmplificationStats.

/**
 * Test case for command 'stats wa'.
 */
@Test
public void testWriteAmplificationStats() throws Exception {
    // generate data and metadata
    Map<String, Integer[]> data = new LinkedHashMap<>();
    data.put("100", new Integer[] { 15, 10 });
    data.put("101", new Integer[] { 20, 10 });
    data.put("102", new Integer[] { 15, 15 });
    for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
        String k = entry.getKey();
        Integer[] v = entry.getValue();
        HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, k, hadoopConf(), Option.of(v[0]), Option.of(v[1]));
    }
    CommandResult cr = shell().executeCommand("stats wa");
    assertTrue(cr.isSuccess());
    // generate expect
    List<Comparable[]> rows = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("#.00");
    data.forEach((key, value) -> {
        // there are two partitions, so need to *2
        rows.add(new Comparable[] { key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1]) });
    });
    int totalWrite = data.values().stream().map(integers -> integers[0] * 2).mapToInt(s -> s).sum();
    int totalUpdate = data.values().stream().map(integers -> integers[1] * 2).mapToInt(s -> s).sum();
    rows.add(new Comparable[] { "Total", totalUpdate, totalWrite, df.format((float) totalWrite / totalUpdate) });
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
    String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : Histogram(com.codahale.metrics.Histogram) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) UniformReservoir(com.codahale.metrics.UniformReservoir) Map(java.util.Map) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CLIFunctionalTestHarness(org.apache.hudi.cli.functional.CLIFunctionalTestHarness) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) TableHeader(org.apache.hudi.cli.TableHeader) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) DecimalFormat(java.text.DecimalFormat) HoodieTestCommitMetadataGenerator(org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator) IOException(java.io.IOException) Snapshot(com.codahale.metrics.Snapshot) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Test(org.junit.jupiter.api.Test) List(java.util.List) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CommandResult(org.springframework.shell.core.CommandResult) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) TableHeader(org.apache.hudi.cli.TableHeader) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) CommandResult(org.springframework.shell.core.CommandResult) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.jupiter.api.Test)

Example 12 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class MetadataCommand method listPartitions.

@CliCommand(value = "metadata list-partitions", help = "List all partitions from metadata")
public String listPartitions(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
    HoodieCLI.getTableMetaClient();
    initJavaSparkContext(Option.of(master));
    HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
    HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new HoodieSparkEngineContext(jsc), config, HoodieCLI.basePath, "/tmp");
    if (!metadata.enabled()) {
        return "[ERROR] Metadata Table not enabled/initialized\n\n";
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<String> partitions = metadata.getAllPartitionPaths();
    LOG.debug("Took " + timer.endTimer() + " ms");
    final List<Comparable[]> rows = new ArrayList<>();
    partitions.stream().sorted(Comparator.reverseOrder()).forEach(p -> {
        Comparable[] row = new Comparable[1];
        row[0] = p;
        rows.add(row);
    });
    TableHeader header = new TableHeader().addTableHeaderField("partition");
    return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieBackedTableMetadata(org.apache.hudi.metadata.HoodieBackedTableMetadata) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 13 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class RollbacksCommand method showRollback.

@CliCommand(value = "show rollback", help = "Show details of a rollback instant")
public String showRollback(@CliOption(key = { "instant" }, help = "Rollback instant", mandatory = true) String rollbackInstant, @CliOption(key = { "limit" }, help = "Limit  #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.getTableMetaClient());
    final List<Comparable[]> rows = new ArrayList<>();
    HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, rollbackInstant)).get(), HoodieRollbackMetadata.class);
    metadata.getPartitionMetadata().forEach((key, value) -> Stream.concat(value.getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)), value.getFailedDeleteFiles().stream().map(f -> Pair.of(f, false))).forEach(fileWithDeleteStatus -> {
        Comparable[] row = new Comparable[5];
        row[0] = metadata.getStartRollbackTime();
        row[1] = metadata.getCommitsRollback().toString();
        row[2] = key;
        row[3] = fileWithDeleteStatus.getLeft();
        row[4] = fileWithDeleteStatus.getRight();
        rows.add(row);
    }));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELETED_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_SUCCEEDED);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CliCommand(org.springframework.shell.core.annotation.CliCommand) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HashMap(java.util.HashMap) CliOption(org.springframework.shell.core.annotation.CliOption) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ROLLBACK_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) CommandMarker(org.springframework.shell.core.CommandMarker) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 14 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class StatsCommand method writeAmplificationStats.

@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many " + "records were actually written")
public String writeAmplificationStats(@CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    long totalRecordsUpserted = 0;
    long totalRecordsWritten = 0;
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
    List<Comparable[]> rows = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("#.00");
    for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
        String waf = "0";
        HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(), HoodieCommitMetadata.class);
        if (commit.fetchTotalUpdateRecordsWritten() > 0) {
            waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
        }
        rows.add(new Comparable[] { instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), commit.fetchTotalRecordsWritten(), waf });
        totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
        totalRecordsWritten += commit.fetchTotalRecordsWritten();
    }
    String waf = "0";
    if (totalRecordsUpserted > 0) {
        waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
    }
    rows.add(new Comparable[] { "Total", totalRecordsUpserted, totalRecordsWritten, waf });
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 15 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class StatsCommand method fileSizeStats.

@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
public String fileSizeStats(@CliOption(key = { "partitionPath" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final String globRegex, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", HoodieCLI.getTableMetaClient().getBasePath(), globRegex);
    List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
    // max, min, #small files < 10MB, 50th, avg, 95th
    Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
    HashMap<String, Histogram> commitHistoMap = new HashMap<>();
    for (FileStatus fileStatus : statuses) {
        String instantTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
        long sz = fileStatus.getLen();
        if (!commitHistoMap.containsKey(instantTime)) {
            commitHistoMap.put(instantTime, new Histogram(new UniformReservoir(MAX_FILES)));
        }
        commitHistoMap.get(instantTime).update(sz);
        globalHistogram.update(sz);
    }
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, Histogram> entry : commitHistoMap.entrySet()) {
        Snapshot s = entry.getValue().getSnapshot();
        rows.add(printFileSizeHistogram(entry.getKey(), s));
    }
    Snapshot s = globalHistogram.getSnapshot();
    rows.add(printFileSizeHistogram("ALL", s));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
    return HoodiePrintHelper.print(header, getFieldNameToConverterMap(), sortByField, descending, limit, headerOnly, rows);
}
Also used : Path(org.apache.hadoop.fs.Path) Histogram(com.codahale.metrics.Histogram) FileStatus(org.apache.hadoop.fs.FileStatus) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Snapshot(com.codahale.metrics.Snapshot) FileSystem(org.apache.hadoop.fs.FileSystem) UniformReservoir(com.codahale.metrics.UniformReservoir) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

ArrayList (java.util.ArrayList)45 TableHeader (org.apache.hudi.cli.TableHeader)45 HashMap (java.util.HashMap)33 CliCommand (org.springframework.shell.core.annotation.CliCommand)22 Map (java.util.Map)19 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 List (java.util.List)18 Test (org.junit.jupiter.api.Test)18 CommandResult (org.springframework.shell.core.CommandResult)18 IOException (java.io.IOException)17 Function (java.util.function.Function)17 HoodieCLI (org.apache.hudi.cli.HoodieCLI)15 HoodiePrintHelper (org.apache.hudi.cli.HoodiePrintHelper)15 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)15 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)14 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)12 Collectors (java.util.stream.Collectors)11 Path (org.apache.hadoop.fs.Path)10 HoodieTableHeaderFields (org.apache.hudi.cli.HoodieTableHeaderFields)10 FSUtils (org.apache.hudi.common.fs.FSUtils)9