use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HoodieMergeHandle method close.
@Override
public List<WriteStatus> close() {
try {
writeIncomingRecords();
if (keyToNewRecords instanceof ExternalSpillableMap) {
((ExternalSpillableMap) keyToNewRecords).close();
} else {
keyToNewRecords.clear();
}
writtenRecordKeys.clear();
if (fileWriter != null) {
fileWriter.close();
fileWriter = null;
}
long fileSizeInBytes = FSUtils.getFileSize(fs, newFilePath);
HoodieWriteStat stat = writeStatus.getStat();
stat.setTotalWriteBytes(fileSizeInBytes);
stat.setFileSizeInBytes(fileSizeInBytes);
stat.setNumWrites(recordsWritten);
stat.setNumDeletes(recordsDeleted);
stat.setNumUpdateWrites(updatedRecordsWritten);
stat.setNumInserts(insertRecordsWritten);
stat.setTotalWriteErrors(writeStatus.getTotalErrorRecords());
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalUpsertTime(timer.endTimer());
stat.setRuntimeStats(runtimeStats);
performMergeDataValidationCheck(writeStatus);
LOG.info(String.format("MergeHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(), stat.getFileId(), runtimeStats.getTotalUpsertTime()));
return Collections.singletonList(writeStatus);
} catch (IOException e) {
throw new HoodieUpsertException("Failed to close UpdateHandle", e);
}
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class IncrementalTimelineSyncFileSystemView method updatePartitionWriteFileGroups.
private void updatePartitionWriteFileGroups(Map<String, List<HoodieWriteStat>> partitionToWriteStats, HoodieTimeline timeline, HoodieInstant instant) {
partitionToWriteStats.entrySet().stream().forEach(entry -> {
String partition = entry.getKey();
if (isPartitionAvailableInStore(partition)) {
LOG.info("Syncing partition (" + partition + ") of instant (" + instant + ")");
FileStatus[] statuses = entry.getValue().stream().map(p -> {
FileStatus status = new FileStatus(p.getFileSizeInBytes(), false, 0, 0, 0, 0, null, null, null, new Path(String.format("%s/%s", metaClient.getBasePath(), p.getPath())));
return status;
}).toArray(FileStatus[]::new);
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, timeline.filterCompletedAndCompactionInstants(), false);
applyDeltaFileSlicesToPartitionView(partition, fileGroups, DeltaApplyMode.ADD);
} else {
LOG.warn("Skipping partition (" + partition + ") when syncing instant (" + instant + ") as it is not loaded");
}
});
LOG.info("Done Syncing committed instant (" + instant + ")");
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HiveTestUtil method createLogFiles.
private static HoodieCommitMetadata createLogFiles(Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple, boolean useSchemaFromCommitMetadata) throws InterruptedException, IOException, URISyntaxException {
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) {
String partitionPath = wEntry.getKey();
for (HoodieWriteStat wStat : wEntry.getValue()) {
Path path = new Path(wStat.getPath());
HoodieBaseFile dataFile = new HoodieBaseFile(fileSystem.getFileStatus(path));
HoodieLogFile logFile = generateLogData(path, isLogSchemaSimple);
HoodieDeltaWriteStat writeStat = new HoodieDeltaWriteStat();
writeStat.setFileId(dataFile.getFileId());
writeStat.setPath(logFile.getPath().toString());
commitMetadata.addWriteStat(partitionPath, writeStat);
}
}
addSchemaToCommitMetadata(commitMetadata, isLogSchemaSimple, useSchemaFromCommitMetadata);
return commitMetadata;
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HiveTestUtil method createTestData.
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String instantTime) throws IOException, URISyntaxException {
List<HoodieWriteStat> writeStats = new ArrayList<>();
for (int i = 0; i < 5; i++) {
// Create 5 files
String fileId = UUID.randomUUID().toString();
Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(instantTime, "1-0-1", fileId));
generateParquetData(filePath, isParquetSchemaSimple);
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setFileId(fileId);
writeStat.setPath(filePath.toString());
writeStats.add(writeStat);
}
return writeStats;
}
use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.
the class HiveTestUtil method createPartitions.
private static HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, boolean useSchemaFromCommitMetadata, ZonedDateTime startFrom, String instantTime, String basePath) throws IOException, URISyntaxException {
startFrom = startFrom.truncatedTo(ChronoUnit.DAYS);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
for (int i = 0; i < numberOfPartitions; i++) {
String partitionPath = startFrom.format(dtfOut);
Path partPath = new Path(basePath + "/" + partitionPath);
fileSystem.makeQualified(partPath);
fileSystem.mkdirs(partPath);
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, instantTime);
startFrom = startFrom.minusDays(1);
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
}
addSchemaToCommitMetadata(commitMetadata, isParquetSchemaSimple, useSchemaFromCommitMetadata);
return commitMetadata;
}
Aggregations