Search in sources :

Example 16 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class FileSystemViewCommand method showLatestFileSlices.

@CliCommand(value = "show fsview latest", help = "Show latest file-system view")
public String showLatestFileSlices(@CliOption(key = { "partitionPath" }, help = "A valid partition path", mandatory = true) String partition, @CliOption(key = { "baseFileOnly" }, help = "Only display base file view", unspecifiedDefaultValue = "false") boolean baseFileOnly, @CliOption(key = { "maxInstant" }, help = "File-Slices upto this instant are displayed", unspecifiedDefaultValue = "") String maxInstant, @CliOption(key = { "merge" }, help = "Merge File Slices due to pending compaction", unspecifiedDefaultValue = "true") final boolean merge, @CliOption(key = { "includeMax" }, help = "Include Max Instant", unspecifiedDefaultValue = "false") boolean includeMaxInstant, @CliOption(key = { "includeInflight" }, help = "Include Inflight Instants", unspecifiedDefaultValue = "false") boolean includeInflight, @CliOption(key = { "excludeCompaction" }, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false") boolean excludeCompaction, @CliOption(key = { "limit" }, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    HoodieTableFileSystemView fsView = buildFileSystemView(partition, maxInstant, baseFileOnly, includeMaxInstant, includeInflight, excludeCompaction);
    List<Comparable[]> rows = new ArrayList<>();
    final Stream<FileSlice> fileSliceStream;
    if (!merge) {
        fileSliceStream = fsView.getLatestFileSlices(partition);
    } else {
        if (maxInstant.isEmpty()) {
            maxInstant = HoodieCLI.getTableMetaClient().getActiveTimeline().filterCompletedAndCompactionInstants().lastInstant().get().getTimestamp();
        }
        fileSliceStream = fsView.getLatestMergedFileSlicesBeforeOrOn(partition, maxInstant);
    }
    fileSliceStream.forEach(fs -> {
        int idx = 0;
        Comparable[] row = new Comparable[baseFileOnly ? 5 : 13];
        row[idx++] = partition;
        row[idx++] = fs.getFileId();
        row[idx++] = fs.getBaseInstantTime();
        row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getPath() : "";
        long dataFileSize = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getFileSize() : -1;
        row[idx++] = dataFileSize;
        if (!baseFileOnly) {
            row[idx++] = fs.getLogFiles().count();
            row[idx++] = fs.getLogFiles().mapToLong(HoodieLogFile::getFileSize).sum();
            long logFilesScheduledForCompactionTotalSize = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).mapToLong(HoodieLogFile::getFileSize).sum();
            row[idx++] = logFilesScheduledForCompactionTotalSize;
            long logFilesUnscheduledTotalSize = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).mapToLong(HoodieLogFile::getFileSize).sum();
            row[idx++] = logFilesUnscheduledTotalSize;
            double logSelectedForCompactionToBaseRatio = dataFileSize > 0 ? logFilesScheduledForCompactionTotalSize / (dataFileSize * 1.0) : -1;
            row[idx++] = logSelectedForCompactionToBaseRatio;
            double logUnscheduledToBaseRatio = dataFileSize > 0 ? logFilesUnscheduledTotalSize / (dataFileSize * 1.0) : -1;
            row[idx++] = logUnscheduledToBaseRatio;
            row[idx++] = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).collect(Collectors.toList()).toString();
            row[idx++] = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())).collect(Collectors.toList()).toString();
        }
        rows.add(row);
    });
    Function<Object, String> converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE, converterFunction);
    if (!baseFileOnly) {
        fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_SIZE, converterFunction);
        fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DELTA_SIZE_SCHEDULED, converterFunction);
        fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DELTA_SIZE_UNSCHEDULED, converterFunction);
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE);
    if (!baseFileOnly) {
        header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_SIZE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_SIZE_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_SIZE_UNSCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_UNSCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_SCHEDULED).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_UNSCHEDULED);
    }
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CommandMarker(org.springframework.shell.core.CommandMarker) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) NumericUtils(org.apache.hudi.common.util.NumericUtils) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) Function(java.util.function.Function) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 17 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class CompactionCommand method printAllCompactions.

/**
 * Prints all compaction details.
 */
private String printAllCompactions(HoodieDefaultTimeline timeline, Function<HoodieInstant, HoodieCompactionPlan> compactionPlanReader, boolean includeExtraMetadata, String sortByField, boolean descending, int limit, boolean headerOnly) {
    Stream<HoodieInstant> instantsStream = timeline.getWriteTimeline().getReverseOrderedInstants();
    List<Pair<HoodieInstant, HoodieCompactionPlan>> compactionPlans = instantsStream.map(instant -> Pair.of(instant, compactionPlanReader.apply(instant))).filter(pair -> pair.getRight() != null).collect(Collectors.toList());
    Set<String> committedInstants = timeline.getCommitTimeline().filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
    List<Comparable[]> rows = new ArrayList<>();
    for (Pair<HoodieInstant, HoodieCompactionPlan> compactionPlan : compactionPlans) {
        HoodieCompactionPlan plan = compactionPlan.getRight();
        HoodieInstant instant = compactionPlan.getLeft();
        final HoodieInstant.State state;
        if (committedInstants.contains(instant.getTimestamp())) {
            state = HoodieInstant.State.COMPLETED;
        } else {
            state = instant.getState();
        }
        if (includeExtraMetadata) {
            rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size(), plan.getExtraMetadata().toString() });
        } else {
            rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size() });
        }
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
    if (includeExtraMetadata) {
        header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
    }
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BiFunction(java.util.function.BiFunction) HoodieException(org.apache.hudi.exception.HoodieException) ObjectInputStream(java.io.ObjectInputStream) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) OperationResult(org.apache.hudi.table.action.compact.OperationResult) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Utils(org.apache.spark.util.Utils) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CommandMarker(org.springframework.shell.core.CommandMarker) SparkCommand(org.apache.hudi.cli.commands.SparkMain.SparkCommand) UtilHelpers(org.apache.hudi.utilities.UtilHelpers) TableHeader(org.apache.hudi.cli.TableHeader) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) InputStreamConsumer(org.apache.hudi.cli.utils.InputStreamConsumer) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) List(java.util.List) Stream(java.util.stream.Stream) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) CommitUtil(org.apache.hudi.cli.utils.CommitUtil) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand) SparkLauncher(org.apache.spark.launcher.SparkLauncher) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) SparkUtil(org.apache.hudi.cli.utils.SparkUtil) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Pair(org.apache.hudi.common.util.collection.Pair)

Example 18 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class CompactionCommand method validateCompaction.

@CliCommand(value = "compaction validate", help = "Validate Compaction")
public String validateCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String outputPathStr = getTmpSerializerFile();
    Path outputPath = new Path(outputPathStr);
    String output;
    try {
        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
        sparkLauncher.addAppArgs(SparkCommand.COMPACT_VALIDATE.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism);
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            return "Failed to validate compaction for " + compactionInstant;
        }
        List<ValidationOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
        boolean valid = res.stream().map(OperationResult::isSuccess).reduce(Boolean::logicalAnd).orElse(true);
        String message = "\n\n\t COMPACTION PLAN " + (valid ? "VALID" : "INVALID") + "\n\n";
        List<Comparable[]> rows = new ArrayList<>();
        res.forEach(r -> {
            Comparable[] row = new Comparable[] { r.getOperation().getFileId(), r.getOperation().getBaseInstantTime(), r.getOperation().getDataFileName().isPresent() ? r.getOperation().getDataFileName().get() : "", r.getOperation().getDeltaFileNames().size(), r.isSuccess(), r.getException().isPresent() ? r.getException().get().getMessage() : "" };
            rows.add(row);
        });
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_VALID).addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
        output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
    } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
            HoodieCLI.fs.delete(outputPath, false);
        }
    }
    return output;
}
Also used : Path(org.apache.hadoop.fs.Path) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 19 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class ArchivedCommitsCommand method showArchivedCommits.

@CliCommand(value = "show archived commit stats", help = "Read commits from archived files and show details")
public String showArchivedCommits(@CliOption(key = { "archiveFolderPattern" }, help = "Archive Folder", unspecifiedDefaultValue = "") String folder, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    System.out.println("===============> Showing only " + limit + " archived commits <===============");
    String basePath = HoodieCLI.getTableMetaClient().getBasePath();
    Path archivePath = new Path(HoodieCLI.getTableMetaClient().getArchivePath() + "/.commits_.archive*");
    if (folder != null && !folder.isEmpty()) {
        archivePath = new Path(basePath + "/.hoodie/" + folder);
    }
    FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath);
    List<Comparable[]> allStats = new ArrayList<>();
    for (FileStatus fs : fsStatuses) {
        // read the archived file
        Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
        List<IndexedRecord> readRecords = new ArrayList<>();
        // read the avro blocks
        while (reader.hasNext()) {
            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
            blk.getRecordItr().forEachRemaining(readRecords::add);
        }
        List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r).filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION) || r.get("actionType").toString().equals(HoodieTimeline.DELTA_COMMIT_ACTION)).flatMap(r -> {
            HoodieCommitMetadata metadata = (HoodieCommitMetadata) SpecificData.get().deepCopy(HoodieCommitMetadata.SCHEMA$, r.get("hoodieCommitMetadata"));
            final String instantTime = r.get("commitTime").toString();
            final String action = r.get("actionType").toString();
            return metadata.getPartitionToWriteStats().values().stream().flatMap(hoodieWriteStats -> hoodieWriteStats.stream().map(hoodieWriteStat -> {
                List<Comparable> row = new ArrayList<>();
                row.add(action);
                row.add(instantTime);
                row.add(hoodieWriteStat.getPartitionPath());
                row.add(hoodieWriteStat.getFileId());
                row.add(hoodieWriteStat.getPrevCommit());
                row.add(hoodieWriteStat.getNumWrites());
                row.add(hoodieWriteStat.getNumInserts());
                row.add(hoodieWriteStat.getNumDeletes());
                row.add(hoodieWriteStat.getNumUpdateWrites());
                row.add(hoodieWriteStat.getTotalLogFiles());
                row.add(hoodieWriteStat.getTotalLogBlocks());
                row.add(hoodieWriteStat.getTotalCorruptLogBlock());
                row.add(hoodieWriteStat.getTotalRollbackBlocks());
                row.add(hoodieWriteStat.getTotalLogRecords());
                row.add(hoodieWriteStat.getTotalUpdatedRecordsCompacted());
                row.add(hoodieWriteStat.getTotalWriteBytes());
                row.add(hoodieWriteStat.getTotalWriteErrors());
                return row;
            })).map(rowList -> rowList.toArray(new Comparable[0]));
        }).collect(Collectors.toList());
        allStats.addAll(readCommits);
        reader.close();
    }
    TableHeader header = new TableHeader().addTableHeaderField("action").addTableHeaderField("instant").addTableHeaderField("partition").addTableHeaderField("file_id").addTableHeaderField("prev_instant").addTableHeaderField("num_writes").addTableHeaderField("num_inserts").addTableHeaderField("num_deletes").addTableHeaderField("num_update_writes").addTableHeaderField("total_log_files").addTableHeaderField("total_log_blocks").addTableHeaderField("total_corrupt_log_blocks").addTableHeaderField("total_rollback_blocks").addTableHeaderField("total_log_records").addTableHeaderField("total_updated_records_compacted").addTableHeaderField("total_write_bytes").addTableHeaderField("total_write_errors");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, allStats);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) ArrayList(java.util.ArrayList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) SpecificData(org.apache.avro.specific.SpecificData) CommandMarker(org.springframework.shell.core.CommandMarker) GenericRecord(org.apache.avro.generic.GenericRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieCommitMetadata(org.apache.hudi.avro.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieCommitMetadata(org.apache.hudi.avro.model.HoodieCommitMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 20 with TableHeader

use of org.apache.hudi.cli.TableHeader in project hudi by apache.

the class BootstrapCommand method showBootstrapIndexMapping.

@CliCommand(value = "bootstrap index showmapping", help = "Show bootstrap index mapping")
public String showBootstrapIndexMapping(@CliOption(key = { "partitionPath" }, unspecifiedDefaultValue = "", help = "A valid partition path") String partitionPath, @CliOption(key = { "fileIds" }, unspecifiedDefaultValue = "", help = "Valid fileIds split by comma") String fileIds, @CliOption(key = { "limit" }, unspecifiedDefaultValue = "-1", help = "Limit rows to be displayed") Integer limit, @CliOption(key = { "sortBy" }, unspecifiedDefaultValue = "", help = "Sorting Field") final String sortByField, @CliOption(key = { "desc" }, unspecifiedDefaultValue = "false", help = "Ordering") final boolean descending, @CliOption(key = { "headeronly" }, unspecifiedDefaultValue = "false", help = "Print Header Only") final boolean headerOnly) {
    if (partitionPath.isEmpty() && !fileIds.isEmpty()) {
        throw new IllegalStateException("PartitionPath is mandatory when passing fileIds.");
    }
    BootstrapIndex.IndexReader indexReader = createBootstrapIndexReader();
    List<String> indexedPartitions = indexReader.getIndexedPartitionPaths();
    if (!partitionPath.isEmpty() && !indexedPartitions.contains(partitionPath)) {
        return partitionPath + " is not an valid indexed partition";
    }
    List<BootstrapFileMapping> mappingList = new ArrayList<>();
    if (!fileIds.isEmpty()) {
        List<HoodieFileGroupId> fileGroupIds = Arrays.stream(fileIds.split(",")).map(fileId -> new HoodieFileGroupId(partitionPath, fileId)).collect(Collectors.toList());
        mappingList.addAll(indexReader.getSourceFileMappingForFileIds(fileGroupIds).values());
    } else if (!partitionPath.isEmpty()) {
        mappingList.addAll(indexReader.getSourceFileMappingForPartition(partitionPath));
    } else {
        for (String part : indexedPartitions) {
            mappingList.addAll(indexReader.getSourceFileMappingForPartition(part));
        }
    }
    final List<Comparable[]> rows = convertBootstrapSourceFileMapping(mappingList);
    final TableHeader header = new TableHeader().addTableHeaderField("Hudi Partition").addTableHeaderField("FileId").addTableHeaderField("Source File Base Path").addTableHeaderField("Source File Partition").addTableHeaderField("Source File Path");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) HoodieException(org.apache.hudi.exception.HoodieException) HashMap(java.util.HashMap) CliOption(org.springframework.shell.core.annotation.CliOption) ArrayList(java.util.ArrayList) Utils(org.apache.spark.util.Utils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) CommandMarker(org.springframework.shell.core.CommandMarker) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) CliCommand(org.springframework.shell.core.annotation.CliCommand) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) SparkLauncher(org.apache.spark.launcher.SparkLauncher) SparkCommand(org.apache.hudi.cli.commands.SparkMain.SparkCommand) UtilHelpers(org.apache.hudi.utilities.UtilHelpers) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) SparkUtil(org.apache.hudi.cli.utils.SparkUtil) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) InputStreamConsumer(org.apache.hudi.cli.utils.InputStreamConsumer) Component(org.springframework.stereotype.Component) List(java.util.List) JavaConverters(scala.collection.JavaConverters) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

ArrayList (java.util.ArrayList)45 TableHeader (org.apache.hudi.cli.TableHeader)45 HashMap (java.util.HashMap)33 CliCommand (org.springframework.shell.core.annotation.CliCommand)22 Map (java.util.Map)19 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 List (java.util.List)18 Test (org.junit.jupiter.api.Test)18 CommandResult (org.springframework.shell.core.CommandResult)18 IOException (java.io.IOException)17 Function (java.util.function.Function)17 HoodieCLI (org.apache.hudi.cli.HoodieCLI)15 HoodiePrintHelper (org.apache.hudi.cli.HoodiePrintHelper)15 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)15 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)14 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)12 Collectors (java.util.stream.Collectors)11 Path (org.apache.hadoop.fs.Path)10 HoodieTableHeaderFields (org.apache.hudi.cli.HoodieTableHeaderFields)10 FSUtils (org.apache.hudi.common.fs.FSUtils)9