use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class CommitsCommand method showCommitFiles.
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
public String showCommitFiles(@CliOption(key = { "createView" }, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, @CliOption(key = { "commit" }, help = "Commit to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
Option<HoodieInstant> hoodieInstantOption = getCommitForInstant(timeline, instantTime);
Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);
if (!commitMetadataOptional.isPresent()) {
return "Commit " + instantTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta = commitMetadataOptional.get();
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
String action = hoodieInstantOption.get().getAction();
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
for (HoodieWriteStat stat : stats) {
rows.add(new Comparable[] { action, path, stat.getFileId(), stat.getPrevCommit(), stat.getNumUpdateWrites(), stat.getNumWrites(), stat.getTotalWriteBytes(), stat.getTotalWriteErrors(), stat.getFileSizeInBytes() });
}
}
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_SIZE);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows, exportTableName);
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class CommitsCommand method printCommitsWithMetadata.
private String printCommitsWithMetadata(HoodieDefaultTimeline timeline, final Integer limit, final String sortByField, final boolean descending, final boolean headerOnly, final String tempTableName) throws IOException {
final List<Comparable[]> rows = new ArrayList<>();
final List<HoodieInstant> commits = timeline.getCommitsTimeline().filterCompletedInstants().getInstants().collect(Collectors.toList());
// timeline can be read from multiple files. So sort is needed instead of reversing the collection
Collections.sort(commits, HoodieInstant.COMPARATOR.reversed());
for (int i = 0; i < commits.size(); i++) {
final HoodieInstant commit = commits.get(i);
final HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class);
for (Map.Entry<String, List<HoodieWriteStat>> partitionWriteStat : commitMetadata.getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat hoodieWriteStat : partitionWriteStat.getValue()) {
rows.add(new Comparable[] { commit.getAction(), commit.getTimestamp(), hoodieWriteStat.getPartitionPath(), hoodieWriteStat.getFileId(), hoodieWriteStat.getPrevCommit(), hoodieWriteStat.getNumWrites(), hoodieWriteStat.getNumInserts(), hoodieWriteStat.getNumDeletes(), hoodieWriteStat.getNumUpdateWrites(), hoodieWriteStat.getTotalWriteErrors(), hoodieWriteStat.getTotalLogBlocks(), hoodieWriteStat.getTotalCorruptLogBlock(), hoodieWriteStat.getTotalRollbackBlocks(), hoodieWriteStat.getTotalLogRecords(), hoodieWriteStat.getTotalUpdatedRecordsCompacted(), hoodieWriteStat.getTotalWriteBytes() });
}
}
}
final Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> {
return NumericUtils.humanReadableByteCount((Double.valueOf(entry.toString())));
});
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_WRITES).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_INSERTS).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELETES).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_UPDATE_WRITES).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_LOG_BLOCKS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_CORRUPT_LOG_BLOCKS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ROLLBACK_BLOCKS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_LOG_RECORDS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPDATED_RECORDS_COMPACTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows, tempTableName);
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class CommitsCommand method showCommitPartitions.
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
public String showCommitPartitions(@CliOption(key = { "createView" }, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, @CliOption(key = { "commit" }, help = "Commit to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
Option<HoodieInstant> hoodieInstantOption = getCommitForInstant(timeline, instantTime);
Option<HoodieCommitMetadata> commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);
if (!commitMetadataOptional.isPresent()) {
return "Commit " + instantTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta = commitMetadataOptional.get();
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
String action = hoodieInstantOption.get().getAction();
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
long totalFilesAdded = 0;
long totalFilesUpdated = 0;
long totalRecordsUpdated = 0;
long totalRecordsInserted = 0;
long totalBytesWritten = 0;
long totalWriteErrors = 0;
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
totalFilesAdded += 1;
} else {
totalFilesUpdated += 1;
totalRecordsUpdated += stat.getNumUpdateWrites();
}
totalRecordsInserted += stat.getNumInserts();
totalBytesWritten += stat.getTotalWriteBytes();
totalWriteErrors += stat.getTotalWriteErrors();
}
rows.add(new Comparable[] { action, path, totalFilesAdded, totalFilesUpdated, totalRecordsInserted, totalRecordsUpdated, totalBytesWritten, totalWriteErrors });
}
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_ADDED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_INSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows, exportTableName);
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HoodieTestReplaceCommitMetadataGenerator method generateReplaceCommitMetadata.
private static HoodieReplaceCommitMetadata generateReplaceCommitMetadata(HashMap<String, List<String>> partitionToFilePaths, Option<Integer> writes, Option<Integer> updates) {
HoodieReplaceCommitMetadata metadata = new HoodieReplaceCommitMetadata();
partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setPartitionPath(key);
writeStat.setPath(DEFAULT_PATH);
writeStat.setFileId(DEFAULT_FILEID);
writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
metadata.addWriteStat(key, writeStat);
}));
metadata.setPartitionToReplaceFileIds(new HashMap<String, List<String>>() {
{
// TODO fix
put(DEFAULT_FIRST_PARTITION_PATH, createImmutableList(baseFileName(DEFAULT_FIRST_PARTITION_PATH, "1")));
}
});
return metadata;
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HoodieSparkCopyOnWriteTable method updateColumnsStatsIndex.
private void updateColumnsStatsIndex(@Nonnull HoodieEngineContext context, @Nonnull List<HoodieWriteStat> updatedFilesStats, @Nonnull String instantTime) throws Exception {
String sortColsList = config.getClusteringSortColumns();
String basePath = metaClient.getBasePath();
String indexPath = metaClient.getColumnStatsIndexPath();
List<String> touchedFiles = updatedFilesStats.stream().map(s -> new Path(basePath, s.getPath()).toString()).collect(Collectors.toList());
if (touchedFiles.isEmpty() || StringUtils.isNullOrEmpty(sortColsList) || StringUtils.isNullOrEmpty(indexPath)) {
return;
}
LOG.info(String.format("Updating column-statistics index table (%s)", indexPath));
List<String> sortCols = Arrays.stream(sortColsList.split(",")).map(String::trim).collect(Collectors.toList());
HoodieSparkEngineContext sparkEngineContext = (HoodieSparkEngineContext) context;
// Fetch table schema to appropriately construct col-stats index schema
Schema tableWriteSchema = HoodieAvroUtils.createHoodieWriteSchema(new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields());
List<String> completedCommits = metaClient.getCommitsTimeline().filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
ColumnStatsIndexHelper.updateColumnStatsIndexFor(sparkEngineContext.getSqlContext().sparkSession(), AvroConversionUtils.convertAvroSchemaToStructType(tableWriteSchema), touchedFiles, sortCols, indexPath, instantTime, completedCommits);
LOG.info(String.format("Successfully updated column-statistics index at instant (%s)", instantTime));
}
Aggregations