use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class HoodieLogFileCommand method showLogFileCommits.
@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
FileSystem fs = HoodieCLI.getTableMetaClient().getFs();
List<String> logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream().map(status -> status.getPath().toString()).collect(Collectors.toList());
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = new HashMap<>();
int numCorruptBlocks = 0;
int dummyInstantTimeCount = 0;
for (String logFilePath : logFilePaths) {
FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
Schema writerSchema = new AvroSchemaConverter().convert(Objects.requireNonNull(TableSchemaResolver.readSchemaFromLogFile(fs, new Path(logFilePath))));
Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
// read the avro blocks
while (reader.hasNext()) {
HoodieLogBlock n = reader.next();
String instantTime;
AtomicInteger recordCount = new AtomicInteger(0);
if (n instanceof HoodieCorruptBlock) {
try {
instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
if (instantTime == null) {
throw new Exception("Invalid instant time " + instantTime);
}
} catch (Exception e) {
numCorruptBlocks++;
instantTime = "corrupt_block_" + numCorruptBlocks;
// could not read metadata for corrupt block
}
} else {
instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
if (instantTime == null) {
// This can happen when reading archived commit files since they were written without any instant time
dummyInstantTimeCount++;
instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
}
if (n instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
}
}
}
if (commitCountAndMetadata.containsKey(instantTime)) {
commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
} else {
List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>();
list.add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
commitCountAndMetadata.put(instantTime, list);
}
}
reader.close();
}
List<Comparable[]> rows = new ArrayList<>();
ObjectMapper objectMapper = new ObjectMapper();
for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata.entrySet()) {
String instantTime = entry.getKey();
for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
Comparable[] output = new Comparable[5];
output[0] = instantTime;
output[1] = tuple3._3();
output[2] = tuple3._1().toString();
output[3] = objectMapper.writeValueAsString(tuple3._2()._1());
output[4] = objectMapper.writeValueAsString(tuple3._2()._2());
rows.add(output);
}
}
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_RECORD_COUNT).addTableHeaderField(HoodieTableHeaderFields.HEADER_BLOCK_TYPE).addTableHeaderField(HoodieTableHeaderFields.HEADER_HEADER_METADATA).addTableHeaderField(HoodieTableHeaderFields.HEADER_FOOTER_METADATA);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class TestCommitsCommand method testShowCommitPartitionsWithReplaceCommits.
@Test
public void testShowCommitPartitionsWithReplaceCommits() throws Exception {
Map<HoodieInstant, Integer[]> data = generateMixedData();
for (HoodieInstant commitInstant : data.keySet()) {
CommandResult cr = shell().executeCommand(String.format("commit showpartitions --commit %s", commitInstant.getTimestamp()));
assertTrue(cr.isSuccess());
Integer[] value = data.get(commitInstant);
List<Comparable[]> rows = new ArrayList<>();
// prevCommit not null, so add 0, update 1
Arrays.asList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).stream().forEach(partition -> rows.add(new Comparable[] { commitInstant.getAction(), partition, 0, 1, 0, value[1], HoodieTestCommitMetadataGenerator.DEFAULT_TOTAL_WRITE_BYTES, 0 }));
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry -> NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_ADDED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_INSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS);
String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class TestCommitsCommand method testShowCommitFilesWithReplaceCommits.
@Test
public void testShowCommitFilesWithReplaceCommits() throws Exception {
Map<HoodieInstant, Integer[]> data = generateMixedData();
for (HoodieInstant commitInstant : data.keySet()) {
CommandResult cr = shell().executeCommand(String.format("commit showfiles --commit %s", commitInstant.getTimestamp()));
assertTrue(cr.isSuccess());
Integer[] value = data.get(commitInstant);
List<Comparable[]> rows = new ArrayList<>();
Arrays.asList(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).stream().forEach(partition -> rows.add(new Comparable[] { commitInstant.getAction(), partition, HoodieTestCommitMetadataGenerator.DEFAULT_FILEID, HoodieTestCommitMetadataGenerator.DEFAULT_PRE_COMMIT, value[1], value[0], HoodieTestCommitMetadataGenerator.DEFAULT_TOTAL_WRITE_BYTES, // default 0 errors and blank file with 0 size
0, 0 }));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_SIZE);
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class TestCompactionCommand method testCompactionsAll.
/**
* Test case for command 'compactions show all'.
*/
@Test
public void testCompactionsAll() throws IOException {
// create MOR table.
new TableCommand().createTable(tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(), "", TimelineLayoutVersion.VERSION_1, HoodieAvroPayload.class.getName());
CompactionTestUtils.setupAndValidateCompactionOperations(HoodieCLI.getTableMetaClient(), false, 3, 4, 3, 3);
HoodieCLI.getTableMetaClient().reloadActiveTimeline();
CommandResult cr = shell().executeCommand("compactions show all");
System.out.println(cr.getResult().toString());
TableHeader header = new TableHeader().addTableHeaderField("Compaction Instant Time").addTableHeaderField("State").addTableHeaderField("Total FileIds to be Compacted");
Map<String, Integer> fileIds = new HashMap();
fileIds.put("001", 3);
fileIds.put("003", 4);
fileIds.put("005", 3);
fileIds.put("007", 3);
List<Comparable[]> rows = new ArrayList<>();
Arrays.asList("001", "003", "005", "007").stream().sorted(Comparator.reverseOrder()).forEach(instant -> {
rows.add(new Comparable[] { instant, "REQUESTED", fileIds.get(instant) });
});
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
assertEquals(expected, cr.getResult().toString());
}
use of org.apache.hudi.cli.TableHeader in project hudi by apache.
the class TestFileSystemViewCommand method testShowCommitsWithSpecifiedValues.
/**
* Test case for 'show fsview all' with specified values.
*/
@Test
public void testShowCommitsWithSpecifiedValues() {
// Test command with options, baseFileOnly and maxInstant is 2
CommandResult cr = shell().executeCommand("show fsview all --baseFileOnly true --maxInstant 2");
assertTrue(cr.isSuccess());
List<Comparable[]> rows = new ArrayList<>();
Stream<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath);
// Only get instant 1, since maxInstant was specified 2
fileGroups.forEach(fg -> fg.getAllFileSlices().filter(fs -> fs.getBaseInstantTime().equals("1")).forEach(fs -> {
int idx = 0;
// For base file only Views, do not display any delta-file related columns.
Comparable[] row = new Comparable[5];
row[idx++] = fg.getPartitionPath();
row[idx++] = fg.getFileGroupId().getFileId();
row[idx++] = fs.getBaseInstantTime();
row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getPath() : "";
row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getFileSize() : -1;
rows.add(row);
}));
Function<Object, String> converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILE_SIZE, converterFunction);
fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE, converterFunction);
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE);
String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows);
expected = removeNonWordAndStripSpace(expected);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expected, got);
}
Aggregations