Search in sources :

Example 66 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieBackedTableMetadata method openReadersIfNeeded.

/**
 * Create a file reader and the record scanner for a given partition and file slice
 * if readers are not already available.
 *
 * @param partitionName - Partition name
 * @param slice         - The file slice to open readers for
 * @return File reader and the record scanner pair for the requested file slice
 */
private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReadersIfNeeded(String partitionName, FileSlice slice) {
    return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> {
        try {
            HoodieTimer timer = new HoodieTimer().startTimer();
            // Open base file reader
            Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
            HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
            final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
            // Open the log record scanner using the log files from the latest file slice
            List<HoodieLogFile> logFiles = slice.getLogFiles().collect(Collectors.toList());
            Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair = getLogRecordScanner(logFiles, partitionName);
            HoodieMetadataMergedLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
            final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
            metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, +baseFileOpenMs + logScannerOpenMs));
            return Pair.of(baseFileReader, logRecordScanner);
        } catch (IOException e) {
            throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
        }
    });
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 67 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieMetadataMetrics method getStats.

private Map<String, String> getStats(HoodieTableFileSystemView fsView, boolean detailed, HoodieTableMetadata tableMetadata) throws IOException {
    Map<String, String> stats = new HashMap<>();
    // Total size of the metadata and count of base/log files
    for (String metadataPartition : MetadataPartitionType.allPaths()) {
        List<FileSlice> latestSlices = fsView.getLatestFileSlices(metadataPartition).collect(Collectors.toList());
        // Total size of the metadata and count of base/log files
        long totalBaseFileSizeInBytes = 0;
        long totalLogFileSizeInBytes = 0;
        int baseFileCount = 0;
        int logFileCount = 0;
        for (FileSlice slice : latestSlices) {
            if (slice.getBaseFile().isPresent()) {
                totalBaseFileSizeInBytes += slice.getBaseFile().get().getFileStatus().getLen();
                ++baseFileCount;
            }
            Iterator<HoodieLogFile> it = slice.getLogFiles().iterator();
            while (it.hasNext()) {
                totalLogFileSizeInBytes += it.next().getFileSize();
                ++logFileCount;
            }
        }
        stats.put(metadataPartition + "." + STAT_TOTAL_BASE_FILE_SIZE, String.valueOf(totalBaseFileSizeInBytes));
        stats.put(metadataPartition + "." + STAT_TOTAL_LOG_FILE_SIZE, String.valueOf(totalLogFileSizeInBytes));
        stats.put(metadataPartition + "." + STAT_COUNT_BASE_FILES, String.valueOf(baseFileCount));
        stats.put(metadataPartition + "." + STAT_COUNT_LOG_FILES, String.valueOf(logFileCount));
    }
    if (detailed) {
        stats.put(HoodieMetadataMetrics.STAT_COUNT_PARTITION, String.valueOf(tableMetadata.getAllPartitionPaths().size()));
    }
    return stats;
}
Also used : HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 68 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class ExportCommand method copyArchivedInstants.

private int copyArchivedInstants(List<FileStatus> statuses, Set<String> actionSet, int limit, String localFolder) throws Exception {
    int copyCount = 0;
    for (FileStatus fs : statuses) {
        // read the archived file
        Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(HoodieCLI.getTableMetaClient().getBasePath(), HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
        // read the avro blocks
        while (reader.hasNext() && copyCount < limit) {
            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
            try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
                while (recordItr.hasNext()) {
                    IndexedRecord ir = recordItr.next();
                    // Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
                    // metadata record from the entry and convert it to json.
                    HoodieArchivedMetaEntry archiveEntryRecord = (HoodieArchivedMetaEntry) SpecificData.get().deepCopy(HoodieArchivedMetaEntry.SCHEMA$, ir);
                    final String action = archiveEntryRecord.get("actionType").toString();
                    if (!actionSet.contains(action)) {
                        continue;
                    }
                    GenericRecord metadata = null;
                    switch(action) {
                        case HoodieTimeline.CLEAN_ACTION:
                            metadata = archiveEntryRecord.getHoodieCleanMetadata();
                            break;
                        case HoodieTimeline.COMMIT_ACTION:
                        case HoodieTimeline.DELTA_COMMIT_ACTION:
                            metadata = archiveEntryRecord.getHoodieCommitMetadata();
                            break;
                        case HoodieTimeline.ROLLBACK_ACTION:
                            metadata = archiveEntryRecord.getHoodieRollbackMetadata();
                            break;
                        case HoodieTimeline.SAVEPOINT_ACTION:
                            metadata = archiveEntryRecord.getHoodieSavePointMetadata();
                            break;
                        case HoodieTimeline.COMPACTION_ACTION:
                            metadata = archiveEntryRecord.getHoodieCompactionMetadata();
                            break;
                        default:
                            throw new HoodieException("Unknown type of action " + action);
                    }
                    final String instantTime = archiveEntryRecord.get("commitTime").toString();
                    final String outPath = localFolder + Path.SEPARATOR + instantTime + "." + action;
                    writeToFile(outPath, HoodieAvroUtils.avroToJson(metadata, true));
                    if (++copyCount == limit) {
                        break;
                    }
                }
            }
        }
        reader.close();
    }
    return copyCount;
}
Also used : HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieException(org.apache.hudi.exception.HoodieException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 69 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class FileSystemViewCommand method showAllFileSlices.

@CliCommand(value = "show fsview all", help = "Show entire file-system view")
public String showAllFileSlices(@CliOption(key = { "pathRegex" }, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") String globRegex, @CliOption(key = { "baseFileOnly" }, help = "Only display base files view", unspecifiedDefaultValue = "false") boolean baseFileOnly, @CliOption(key = { "maxInstant" }, help = "File-Slices upto this instant are displayed", unspecifiedDefaultValue = "") String maxInstant, @CliOption(key = { "includeMax" }, help = "Include Max Instant", unspecifiedDefaultValue = "false") boolean includeMaxInstant, @CliOption(key = { "includeInflight" }, help = "Include Inflight Instants", unspecifiedDefaultValue = "false") boolean includeInflight, @CliOption(key = { "excludeCompaction" }, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false") boolean excludeCompaction, @CliOption(key = { "limit" }, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    HoodieTableFileSystemView fsView = buildFileSystemView(globRegex, maxInstant, baseFileOnly, includeMaxInstant, includeInflight, excludeCompaction);
    List<Comparable[]> rows = new ArrayList<>();
    fsView.getAllFileGroups().forEach(fg -> fg.getAllFileSlices().forEach(fs -> {
        int idx = 0;
        // For base file only Views, do not display any delta-file related columns
        Comparable[] row = new Comparable[baseFileOnly ? 5 : 8];
        row[idx++] = fg.getPartitionPath();
        row[idx++] = fg.getFileGroupId().getFileId();
        row[idx++] = fs.getBaseInstantTime();
        row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getPath() : "";
        row[idx++] = fs.getBaseFile().isPresent() ? fs.getBaseFile().get().getFileSize() : -1;
        if (!baseFileOnly) {
            row[idx++] = fs.getLogFiles().count();
            row[idx++] = fs.getLogFiles().mapToLong(HoodieLogFile::getFileSize).sum();
            row[idx++] = fs.getLogFiles().collect(Collectors.toList()).toString();
        }
        rows.add(row);
    }));
    Function<Object, String> converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILE_SIZE, converterFunction);
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE, converterFunction);
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_SIZE);
    if (!baseFileOnly) {
        header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILE_SIZE).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES);
    }
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CommandMarker(org.springframework.shell.core.CommandMarker) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) NumericUtils(org.apache.hudi.common.util.NumericUtils) Function(java.util.function.Function) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 70 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieLogFileCommand method showLogFileRecords.

@CliCommand(value = "show logfile records", help = "Read records from log files")
public String showLogFileRecords(@CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit, @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified paths for the log files") final String logFilePathPattern, @CliOption(key = "mergeRecords", help = "If the records in the log files should be merged", unspecifiedDefaultValue = "false") final Boolean shouldMerge) throws IOException {
    System.out.println("===============> Showing only " + limit + " records <===============");
    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
    FileSystem fs = client.getFs();
    List<String> logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream().map(status -> status.getPath().toString()).sorted(Comparator.reverseOrder()).collect(Collectors.toList());
    // logFilePaths size must > 1
    assert logFilePaths.size() > 0 : "There is no log file";
    // TODO : readerSchema can change across blocks/log files, fix this inside Scanner
    AvroSchemaConverter converter = new AvroSchemaConverter();
    // get schema from last log file
    Schema readerSchema = converter.convert(Objects.requireNonNull(TableSchemaResolver.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1)))));
    List<IndexedRecord> allRecords = new ArrayList<>();
    if (shouldMerge) {
        System.out.println("===========================> MERGING RECORDS <===================");
        HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(fs).withBasePath(client.getBasePath()).withLogFilePaths(logFilePaths).withReaderSchema(readerSchema).withLatestInstantTime(client.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp()).withReadBlocksLazily(Boolean.parseBoolean(HoodieCompactionConfig.COMPACTION_LAZY_BLOCK_READ_ENABLE.defaultValue())).withReverseReader(Boolean.parseBoolean(HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLE.defaultValue())).withBufferSize(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE.defaultValue()).withMaxMemorySizeInBytes(HoodieMemoryConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES).withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH.defaultValue()).withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue()).withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue()).build();
        for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
            Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
            if (allRecords.size() < limit) {
                allRecords.add(record.get());
            }
        }
    } else {
        for (String logFile : logFilePaths) {
            Schema writerSchema = new AvroSchemaConverter().convert(Objects.requireNonNull(TableSchemaResolver.readSchemaFromLogFile(client.getFs(), new Path(logFile))));
            HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
            // read the avro blocks
            while (reader.hasNext()) {
                HoodieLogBlock n = reader.next();
                if (n instanceof HoodieDataBlock) {
                    HoodieDataBlock blk = (HoodieDataBlock) n;
                    try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
                        recordItr.forEachRemaining(record -> {
                            if (allRecords.size() < limit) {
                                allRecords.add(record);
                            }
                        });
                    }
                }
            }
            reader.close();
            if (allRecords.size() >= limit) {
                break;
            }
        }
    }
    String[][] rows = new String[allRecords.size()][];
    int i = 0;
    for (IndexedRecord record : allRecords) {
        String[] data = new String[1];
        data[0] = record.toString();
        rows[i] = data;
        i++;
    }
    return HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_RECORDS }, rows);
}
Also used : Path(org.apache.hadoop.fs.Path) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21