use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class TestStatsCommand method testWriteAmplificationStats.
/**
* Test case for command 'stats wa'.
*/
@Test
public void testWriteAmplificationStats() throws Exception {
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put("100", new Integer[] { 15, 10 });
data.put("101", new Integer[] { 20, 10 });
data.put("102", new Integer[] { 15, 15 });
for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
String k = entry.getKey();
Integer[] v = entry.getValue();
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, k, hadoopConf(), Option.of(v[0]), Option.of(v[1]));
}
CommandResult cr = shell().executeCommand("stats wa");
assertTrue(cr.isSuccess());
// generate expect
List<Comparable[]> rows = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
data.forEach((key, value) -> {
// there are two partitions, so need to *2
rows.add(new Comparable[] { key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1]) });
});
int totalWrite = data.values().stream().map(integers -> integers[0] * 2).mapToInt(s -> s).sum();
int totalUpdate = data.values().stream().map(integers -> integers[1] * 2).mapToInt(s -> s).sum();
rows.add(new Comparable[] { "Total", totalUpdate, totalWrite, df.format((float) totalWrite / totalUpdate) });
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class MetadataCommand method listPartitions.
@CliCommand(value = "metadata list-partitions", help = "List all partitions from metadata")
public String listPartitions(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
HoodieCLI.getTableMetaClient();
initJavaSparkContext(Option.of(master));
HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new HoodieSparkEngineContext(jsc), config, HoodieCLI.basePath, "/tmp");
if (!metadata.enabled()) {
return "[ERROR] Metadata Table not enabled/initialized\n\n";
}
HoodieTimer timer = new HoodieTimer().startTimer();
List<String> partitions = metadata.getAllPartitionPaths();
LOG.debug("Took " + timer.endTimer() + " ms");
final List<Comparable[]> rows = new ArrayList<>();
partitions.stream().sorted(Comparator.reverseOrder()).forEach(p -> {
Comparable[] row = new Comparable[1];
row[0] = p;
rows.add(row);
});
TableHeader header = new TableHeader().addTableHeaderField("partition");
return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class RollbacksCommand method showRollback.
@CliCommand(value = "show rollback", help = "Show details of a rollback instant")
public String showRollback(@CliOption(key = { "instant" }, help = "Rollback instant", mandatory = true) String rollbackInstant, @CliOption(key = { "limit" }, help = "Limit #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.getTableMetaClient());
final List<Comparable[]> rows = new ArrayList<>();
HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, rollbackInstant)).get(), HoodieRollbackMetadata.class);
metadata.getPartitionMetadata().forEach((key, value) -> Stream.concat(value.getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)), value.getFailedDeleteFiles().stream().map(f -> Pair.of(f, false))).forEach(fileWithDeleteStatus -> {
Comparable[] row = new Comparable[5];
row[0] = metadata.getStartRollbackTime();
row[1] = metadata.getCommitsRollback().toString();
row[2] = key;
row[3] = fileWithDeleteStatus.getLeft();
row[4] = fileWithDeleteStatus.getRight();
rows.add(row);
}));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELETED_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_SUCCEEDED);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class StatsCommand method writeAmplificationStats.
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many " + "records were actually written")
public String writeAmplificationStats(@CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
long totalRecordsUpserted = 0;
long totalRecordsWritten = 0;
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
List<Comparable[]> rows = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
String waf = "0";
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(), HoodieCommitMetadata.class);
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
}
rows.add(new Comparable[] { instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), commit.fetchTotalRecordsWritten(), waf });
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
totalRecordsWritten += commit.fetchTotalRecordsWritten();
}
String waf = "0";
if (totalRecordsUpserted > 0) {
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
}
rows.add(new Comparable[] { "Total", totalRecordsUpserted, totalRecordsWritten, waf });
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class StatsCommand method fileSizeStats.
@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
public String fileSizeStats(@CliOption(key = { "partitionPath" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final String globRegex, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*", HoodieCLI.getTableMetaClient().getBasePath(), globRegex);
List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
// max, min, #small files < 10MB, 50th, avg, 95th
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
for (FileStatus fileStatus : statuses) {
String instantTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
long sz = fileStatus.getLen();
if (!commitHistoMap.containsKey(instantTime)) {
commitHistoMap.put(instantTime, new Histogram(new UniformReservoir(MAX_FILES)));
}
commitHistoMap.get(instantTime).update(sz);
globalHistogram.update(sz);
}
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, Histogram> entry : commitHistoMap.entrySet()) {
Snapshot s = entry.getValue().getSnapshot();
rows.add(printFileSizeHistogram(entry.getKey(), s));
}
Snapshot s = globalHistogram.getSnapshot();
rows.add(printFileSizeHistogram("ALL", s));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
return HoodiePrintHelper.print(header, getFieldNameToConverterMap(), sortByField, descending, limit, headerOnly, rows);
}
Aggregations