use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.
the class HoodieAppendHandle method updateRuntimeStats.
private void updateRuntimeStats(HoodieDeltaWriteStat stat) {
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalUpsertTime(timer.endTimer());
stat.setRuntimeStats(runtimeStats);
}
use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.
the class HoodieAppendHandle method accumulateRuntimeStats.
private void accumulateRuntimeStats(HoodieDeltaWriteStat stat) {
RuntimeStats runtimeStats = stat.getRuntimeStats();
assert runtimeStats != null;
runtimeStats.setTotalUpsertTime(runtimeStats.getTotalUpsertTime() + timer.endTimer());
}
use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.
the class HoodieTestUtils method generateFakeHoodieWriteStat.
public static List<HoodieWriteStat> generateFakeHoodieWriteStat(int limit) {
List<HoodieWriteStat> writeStatList = new ArrayList<>();
for (int i = 0; i < limit; i++) {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setFileId(UUID.randomUUID().toString());
writeStat.setNumDeletes(0);
writeStat.setNumUpdateWrites(100);
writeStat.setNumWrites(100);
writeStat.setPath("/some/fake/path" + i);
writeStat.setPartitionPath("/some/fake/partition/path" + i);
writeStat.setTotalLogFilesCompacted(100L);
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalScanTime(100);
runtimeStats.setTotalCreateTime(100);
runtimeStats.setTotalUpsertTime(100);
writeStat.setRuntimeStats(runtimeStats);
writeStatList.add(writeStat);
}
return writeStatList;
}
use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.
the class HoodieCompactor method compact.
/**
* Execute a single compaction operation and report back status.
*/
public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler, HoodieTableMetaClient metaClient, HoodieWriteConfig config, CompactionOperation operation, String instantTime, TaskContextSupplier taskContextSupplier) throws IOException {
FileSystem fs = metaClient.getFs();
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()), config.allowOperationMetadataField());
LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames() + " for commit " + instantTime);
// TODO - FIX THIS
// Reads the entire avro file. Always only specific blocks should be read from the avro file
// (failure recover).
// Load all the delta commits since the last compaction commit and get all the blocks to be
// loaded and load it using CompositeAvroLogReader
// Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
String maxInstantTime = metaClient.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)).filterCompletedInstants().lastInstant().get().getTimestamp();
long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(taskContextSupplier, config);
LOG.info("MaxMemoryPerCompaction => " + maxMemoryPerCompaction);
List<String> logFiles = operation.getDeltaFileNames().stream().map(p -> new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), p).toString()).collect(toList());
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(fs).withBasePath(metaClient.getBasePath()).withLogFilePaths(logFiles).withReaderSchema(readerSchema).withLatestInstantTime(maxInstantTime).withMaxMemorySizeInBytes(maxMemoryPerCompaction).withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled()).withReverseReader(config.getCompactionReverseLogReadEnabled()).withBufferSize(config.getMaxDFSStreamBufferSize()).withSpillableMapBasePath(config.getSpillableMapBasePath()).withDiskMapType(config.getCommonConfig().getSpillableDiskMapType()).withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled()).withOperationField(config.allowOperationMetadataField()).withPartition(operation.getPartitionPath()).build();
Option<HoodieBaseFile> oldDataFileOpt = operation.getBaseFile(metaClient.getBasePath(), operation.getPartitionPath());
// But in this case, we need to give it a base file. Otherwise, it will lose base file in following fileSlice.
if (!scanner.iterator().hasNext()) {
if (!oldDataFileOpt.isPresent()) {
scanner.close();
return new ArrayList<>();
} else {
// TODO: we may directly rename original parquet file if there is not evolution/devolution of schema
/*
TaskContextSupplier taskContextSupplier = hoodieCopyOnWriteTable.getTaskContextSupplier();
String newFileName = FSUtils.makeDataFileName(instantTime,
FSUtils.makeWriteToken(taskContextSupplier.getPartitionIdSupplier().get(), taskContextSupplier.getStageIdSupplier().get(), taskContextSupplier.getAttemptIdSupplier().get()),
operation.getFileId(), hoodieCopyOnWriteTable.getBaseFileExtension());
Path oldFilePath = new Path(oldDataFileOpt.get().getPath());
Path newFilePath = new Path(oldFilePath.getParent(), newFileName);
FileUtil.copy(fs,oldFilePath, fs, newFilePath, false, fs.getConf());
*/
}
}
// Compacting is very similar to applying updates to existing file
Iterator<List<WriteStatus>> result;
// If the dataFile is present, perform updates else perform inserts into a new base file.
if (oldDataFileOpt.isPresent()) {
result = compactionHandler.handleUpdate(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.getRecords(), oldDataFileOpt.get());
} else {
result = compactionHandler.handleInsert(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.getRecords());
}
scanner.close();
Iterable<List<WriteStatus>> resultIterable = () -> result;
return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream).peek(s -> {
s.getStat().setTotalUpdatedRecordsCompacted(scanner.getNumMergedRecordsInLog());
s.getStat().setTotalLogFilesCompacted(scanner.getTotalLogFiles());
s.getStat().setTotalLogRecords(scanner.getTotalLogRecords());
s.getStat().setPartitionPath(operation.getPartitionPath());
s.getStat().setTotalLogSizeCompacted(operation.getMetrics().get(CompactionStrategy.TOTAL_LOG_FILE_SIZE).longValue());
s.getStat().setTotalLogBlocks(scanner.getTotalLogBlocks());
s.getStat().setTotalCorruptLogBlock(scanner.getTotalCorruptBlocks());
s.getStat().setTotalRollbackBlocks(scanner.getTotalRollbacks());
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalScanTime(scanner.getTotalTimeTakenToReadAndMergeBlocks());
s.getStat().setRuntimeStats(runtimeStats);
}).collect(toList());
}
use of org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats in project hudi by apache.
the class HoodieCreateHandle method setupWriteStatus.
/**
* Set up the write status.
*
* @throws IOException if error occurs
*/
protected void setupWriteStatus() throws IOException {
HoodieWriteStat stat = writeStatus.getStat();
stat.setPartitionPath(writeStatus.getPartitionPath());
stat.setNumWrites(recordsWritten);
stat.setNumDeletes(recordsDeleted);
stat.setNumInserts(insertRecordsWritten);
stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
stat.setFileId(writeStatus.getFileId());
stat.setPath(new Path(config.getBasePath()), path);
stat.setTotalWriteErrors(writeStatus.getTotalErrorRecords());
long fileSize = FSUtils.getFileSize(fs, path);
stat.setTotalWriteBytes(fileSize);
stat.setFileSizeInBytes(fileSize);
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalCreateTime(timer.endTimer());
stat.setRuntimeStats(runtimeStats);
}
Aggregations