Search in sources :

Example 6 with HoodieBackedTableMetadata

use of org.apache.hudi.metadata.HoodieBackedTableMetadata in project hudi by apache.

the class MetadataCommand method stats.

@CliCommand(value = "metadata stats", help = "Print stats about the metadata")
public String stats() throws IOException {
    HoodieCLI.getTableMetaClient();
    HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
    HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), config, HoodieCLI.basePath, "/tmp");
    Map<String, String> stats = metadata.stats();
    final List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, String> entry : stats.entrySet()) {
        Comparable[] row = new Comparable[2];
        row[0] = entry.getKey();
        row[1] = entry.getValue();
        rows.add(row);
    }
    TableHeader header = new TableHeader().addTableHeaderField("stat key").addTableHeaderField("stat value");
    return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
Also used : TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieBackedTableMetadata(org.apache.hudi.metadata.HoodieBackedTableMetadata) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 7 with HoodieBackedTableMetadata

use of org.apache.hudi.metadata.HoodieBackedTableMetadata in project hudi by apache.

the class MetadataCommand method validateFiles.

@CliCommand(value = "metadata validate-files", help = "Validate all files in all partitions from the metadata")
public String validateFiles(@CliOption(key = { "verbose" }, help = "Print all file details", unspecifiedDefaultValue = "false") final boolean verbose) throws IOException {
    HoodieCLI.getTableMetaClient();
    HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
    HoodieBackedTableMetadata metadataReader = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), config, HoodieCLI.basePath, "/tmp");
    if (!metadataReader.enabled()) {
        return "[ERROR] Metadata Table not enabled/initialized\n\n";
    }
    HoodieMetadataConfig fsConfig = HoodieMetadataConfig.newBuilder().enable(false).build();
    HoodieBackedTableMetadata fsMetaReader = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), fsConfig, HoodieCLI.basePath, "/tmp");
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<String> metadataPartitions = metadataReader.getAllPartitionPaths();
    LOG.debug("Listing partitions Took " + timer.endTimer() + " ms");
    List<String> fsPartitions = fsMetaReader.getAllPartitionPaths();
    Collections.sort(fsPartitions);
    Collections.sort(metadataPartitions);
    Set<String> allPartitions = new HashSet<>();
    allPartitions.addAll(fsPartitions);
    allPartitions.addAll(metadataPartitions);
    if (!fsPartitions.equals(metadataPartitions)) {
        LOG.error("FS partition listing is not matching with metadata partition listing!");
        LOG.error("All FS partitions: " + Arrays.toString(fsPartitions.toArray()));
        LOG.error("All Metadata partitions: " + Arrays.toString(metadataPartitions.toArray()));
    }
    final List<Comparable[]> rows = new ArrayList<>();
    for (String partition : allPartitions) {
        Map<String, FileStatus> fileStatusMap = new HashMap<>();
        Map<String, FileStatus> metadataFileStatusMap = new HashMap<>();
        FileStatus[] metadataStatuses = metadataReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
        Arrays.stream(metadataStatuses).forEach(entry -> metadataFileStatusMap.put(entry.getPath().getName(), entry));
        FileStatus[] fsStatuses = fsMetaReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
        Arrays.stream(fsStatuses).forEach(entry -> fileStatusMap.put(entry.getPath().getName(), entry));
        Set<String> allFiles = new HashSet<>();
        allFiles.addAll(fileStatusMap.keySet());
        allFiles.addAll(metadataFileStatusMap.keySet());
        for (String file : allFiles) {
            Comparable[] row = new Comparable[6];
            row[0] = partition;
            FileStatus fsFileStatus = fileStatusMap.get(file);
            FileStatus metaFileStatus = metadataFileStatusMap.get(file);
            boolean doesFsFileExists = fsFileStatus != null;
            boolean doesMetadataFileExists = metaFileStatus != null;
            long fsFileLength = doesFsFileExists ? fsFileStatus.getLen() : 0;
            long metadataFileLength = doesMetadataFileExists ? metaFileStatus.getLen() : 0;
            row[1] = file;
            row[2] = doesFsFileExists;
            row[3] = doesMetadataFileExists;
            row[4] = fsFileLength;
            row[5] = metadataFileLength;
            if (verbose) {
                // if verbose print all files
                rows.add(row);
            } else if ((doesFsFileExists != doesMetadataFileExists) || (fsFileLength != metadataFileLength)) {
                // if non verbose, print only non matching files
                rows.add(row);
            }
        }
        if (metadataStatuses.length != fsStatuses.length) {
            LOG.error(" FS and metadata files count not matching for " + partition + ". FS files count " + fsStatuses.length + ", metadata base files count " + metadataStatuses.length);
        }
        for (Map.Entry<String, FileStatus> entry : fileStatusMap.entrySet()) {
            if (!metadataFileStatusMap.containsKey(entry.getKey())) {
                LOG.error("FS file not found in metadata " + entry.getKey());
            } else {
                if (entry.getValue().getLen() != metadataFileStatusMap.get(entry.getKey()).getLen()) {
                    LOG.error(" FS file size mismatch " + entry.getKey() + ", size equality " + (entry.getValue().getLen() == metadataFileStatusMap.get(entry.getKey()).getLen()) + ". FS size " + entry.getValue().getLen() + ", metadata size " + metadataFileStatusMap.get(entry.getKey()).getLen());
                }
            }
        }
        for (Map.Entry<String, FileStatus> entry : metadataFileStatusMap.entrySet()) {
            if (!fileStatusMap.containsKey(entry.getKey())) {
                LOG.error("Metadata file not found in FS " + entry.getKey());
            } else {
                if (entry.getValue().getLen() != fileStatusMap.get(entry.getKey()).getLen()) {
                    LOG.error(" Metadata file size mismatch " + entry.getKey() + ", size equality " + (entry.getValue().getLen() == fileStatusMap.get(entry.getKey()).getLen()) + ". Metadata size " + entry.getValue().getLen() + ", FS size " + metadataFileStatusMap.get(entry.getKey()).getLen());
                }
            }
        }
    }
    TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("File Name").addTableHeaderField(" Is Present in FS ").addTableHeaderField(" Is Present in Metadata").addTableHeaderField(" FS size").addTableHeaderField(" Metadata size");
    return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieLocalEngineContext(org.apache.hudi.common.engine.HoodieLocalEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieBackedTableMetadata(org.apache.hudi.metadata.HoodieBackedTableMetadata) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

HoodieBackedTableMetadata (org.apache.hudi.metadata.HoodieBackedTableMetadata)7 ArrayList (java.util.ArrayList)5 HoodieMetadataConfig (org.apache.hudi.common.config.HoodieMetadataConfig)5 Map (java.util.Map)4 FileStatus (org.apache.hadoop.fs.FileStatus)4 Path (org.apache.hadoop.fs.Path)4 TableHeader (org.apache.hudi.cli.TableHeader)4 CliCommand (org.springframework.shell.core.annotation.CliCommand)4 HashMap (java.util.HashMap)3 HoodieLocalEngineContext (org.apache.hudi.common.engine.HoodieLocalEngineContext)3 HoodieTimer (org.apache.hudi.common.util.HoodieTimer)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 EnumSource (org.junit.jupiter.params.provider.EnumSource)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 HashSet (java.util.HashSet)2 List (java.util.List)2 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)2 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)2