Search in sources :

Example 1 with RuntimeStats

use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.

the class HoodieAppendHandle method updateRuntimeStats.

private void updateRuntimeStats(HoodieDeltaWriteStat stat) {
    RuntimeStats runtimeStats = new RuntimeStats();
    runtimeStats.setTotalUpsertTime(timer.endTimer());
    stat.setRuntimeStats(runtimeStats);
}
Also used : RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats)

Example 2 with RuntimeStats

use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.

the class HoodieAppendHandle method accumulateRuntimeStats.

private void accumulateRuntimeStats(HoodieDeltaWriteStat stat) {
    RuntimeStats runtimeStats = stat.getRuntimeStats();
    assert runtimeStats != null;
    runtimeStats.setTotalUpsertTime(runtimeStats.getTotalUpsertTime() + timer.endTimer());
}
Also used : RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats)

Example 3 with RuntimeStats

use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.

the class HoodieTestUtils method generateFakeHoodieWriteStat.

public static List<HoodieWriteStat> generateFakeHoodieWriteStat(int limit) {
    List<HoodieWriteStat> writeStatList = new ArrayList<>();
    for (int i = 0; i < limit; i++) {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setFileId(UUID.randomUUID().toString());
        writeStat.setNumDeletes(0);
        writeStat.setNumUpdateWrites(100);
        writeStat.setNumWrites(100);
        writeStat.setPath("/some/fake/path" + i);
        writeStat.setPartitionPath("/some/fake/partition/path" + i);
        writeStat.setTotalLogFilesCompacted(100L);
        RuntimeStats runtimeStats = new RuntimeStats();
        runtimeStats.setTotalScanTime(100);
        runtimeStats.setTotalCreateTime(100);
        runtimeStats.setTotalUpsertTime(100);
        writeStat.setRuntimeStats(runtimeStats);
        writeStatList.add(writeStat);
    }
    return writeStatList;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats) ArrayList(java.util.ArrayList)

Example 4 with RuntimeStats

use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.

the class HoodieCompactor method compact.

/**
 * Execute a single compaction operation and report back status.
 */
public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler, HoodieTableMetaClient metaClient, HoodieWriteConfig config, CompactionOperation operation, String instantTime, TaskContextSupplier taskContextSupplier) throws IOException {
    FileSystem fs = metaClient.getFs();
    Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()), config.allowOperationMetadataField());
    LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames() + " for commit " + instantTime);
    // TODO - FIX THIS
    // Reads the entire avro file. Always only specific blocks should be read from the avro file
    // (failure recover).
    // Load all the delta commits since the last compaction commit and get all the blocks to be
    // loaded and load it using CompositeAvroLogReader
    // Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
    String maxInstantTime = metaClient.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterCompletedInstants().lastInstant().get().getTimestamp();
    long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(taskContextSupplier, config);
    LOG.info("MaxMemoryPerCompaction => " + maxMemoryPerCompaction);
    List<String> logFiles = operation.getDeltaFileNames().stream().map(p -> new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), p).toString()).collect(toList());
    HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(fs).withBasePath(metaClient.getBasePath()).withLogFilePaths(logFiles).withReaderSchema(readerSchema).withLatestInstantTime(maxInstantTime).withMaxMemorySizeInBytes(maxMemoryPerCompaction).withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled()).withReverseReader(config.getCompactionReverseLogReadEnabled()).withBufferSize(config.getMaxDFSStreamBufferSize()).withSpillableMapBasePath(config.getSpillableMapBasePath()).withDiskMapType(config.getCommonConfig().getSpillableDiskMapType()).withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled()).withOperationField(config.allowOperationMetadataField()).withPartition(operation.getPartitionPath()).build();
    Option<HoodieBaseFile> oldDataFileOpt = operation.getBaseFile(metaClient.getBasePath(), operation.getPartitionPath());
    // But in this case, we need to give it a base file. Otherwise, it will lose base file in following fileSlice.
    if (!scanner.iterator().hasNext()) {
        if (!oldDataFileOpt.isPresent()) {
            scanner.close();
            return new ArrayList<>();
        } else {
        // TODO: we may directly rename original parquet file if there is not evolution/devolution of schema
        /*
        TaskContextSupplier taskContextSupplier = hoodieCopyOnWriteTable.getTaskContextSupplier();
        String newFileName = FSUtils.makeDataFileName(instantTime,
            FSUtils.makeWriteToken(taskContextSupplier.getPartitionIdSupplier().get(), taskContextSupplier.getStageIdSupplier().get(), taskContextSupplier.getAttemptIdSupplier().get()),
            operation.getFileId(), hoodieCopyOnWriteTable.getBaseFileExtension());
        Path oldFilePath = new Path(oldDataFileOpt.get().getPath());
        Path newFilePath = new Path(oldFilePath.getParent(), newFileName);
        FileUtil.copy(fs,oldFilePath, fs, newFilePath, false, fs.getConf());
        */
        }
    }
    // Compacting is very similar to applying updates to existing file
    Iterator<List<WriteStatus>> result;
    // If the dataFile is present, perform updates else perform inserts into a new base file.
    if (oldDataFileOpt.isPresent()) {
        result = compactionHandler.handleUpdate(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.getRecords(), oldDataFileOpt.get());
    } else {
        result = compactionHandler.handleInsert(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.getRecords());
    }
    scanner.close();
    Iterable<List<WriteStatus>> resultIterable = () -> result;
    return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream).peek(s -> {
        s.getStat().setTotalUpdatedRecordsCompacted(scanner.getNumMergedRecordsInLog());
        s.getStat().setTotalLogFilesCompacted(scanner.getTotalLogFiles());
        s.getStat().setTotalLogRecords(scanner.getTotalLogRecords());
        s.getStat().setPartitionPath(operation.getPartitionPath());
        s.getStat().setTotalLogSizeCompacted(operation.getMetrics().get(CompactionStrategy.TOTAL_LOG_FILE_SIZE).longValue());
        s.getStat().setTotalLogBlocks(scanner.getTotalLogBlocks());
        s.getStat().setTotalCorruptLogBlock(scanner.getTotalCorruptBlocks());
        s.getStat().setTotalRollbackBlocks(scanner.getTotalRollbacks());
        RuntimeStats runtimeStats = new RuntimeStats();
        runtimeStats.setTotalScanTime(scanner.getTotalTimeTakenToReadAndMergeBlocks());
        s.getStat().setRuntimeStats(runtimeStats);
    }).collect(toList());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieAccumulator(org.apache.hudi.common.data.HoodieAccumulator) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) StreamSupport(java.util.stream.StreamSupport) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieData(org.apache.hudi.common.data.HoodieData) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) TaskContextSupplier(org.apache.hudi.common.engine.TaskContextSupplier) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) CompactionStrategy(org.apache.hudi.table.action.compact.strategy.CompactionStrategy) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieCompactionHandler(org.apache.hudi.table.HoodieCompactionHandler) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) FileSystem(org.apache.hadoop.fs.FileSystem) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList)

Example 5 with RuntimeStats

use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.

the class HoodieCreateHandle method setupWriteStatus.

/**
 * Set up the write status.
 *
 * @throws IOException if error occurs
 */
protected void setupWriteStatus() throws IOException {
    HoodieWriteStat stat = writeStatus.getStat();
    stat.setPartitionPath(writeStatus.getPartitionPath());
    stat.setNumWrites(recordsWritten);
    stat.setNumDeletes(recordsDeleted);
    stat.setNumInserts(insertRecordsWritten);
    stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
    stat.setFileId(writeStatus.getFileId());
    stat.setPath(new Path(config.getBasePath()), path);
    stat.setTotalWriteErrors(writeStatus.getTotalErrorRecords());
    long fileSize = FSUtils.getFileSize(fs, path);
    stat.setTotalWriteBytes(fileSize);
    stat.setFileSizeInBytes(fileSize);
    RuntimeStats runtimeStats = new RuntimeStats();
    runtimeStats.setTotalCreateTime(timer.endTimer());
    stat.setRuntimeStats(runtimeStats);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) RuntimeStats(org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats)

Aggregations

RuntimeStats (org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats)6 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 Serializable (java.io.Serializable)1 Collection (java.util.Collection)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Set (java.util.Set)1 Collectors.toList (java.util.stream.Collectors.toList)1 StreamSupport (java.util.stream.StreamSupport)1 Schema (org.apache.avro.Schema)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 HoodieAvroUtils (org.apache.hudi.avro.HoodieAvroUtils)1 HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)1 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 HoodieAccumulator (org.apache.hudi.common.data.HoodieAccumulator)1 HoodieData (org.apache.hudi.common.data.HoodieData)1