use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class HoodieClientTestHarness method validateMetadata.
/**
* Validate the metadata tables contents to ensure it matches what is on the file system.
*/
public void validateMetadata(HoodieTestTable testTable, List<String> inflightCommits, HoodieWriteConfig writeConfig, String metadataTableBasePath, boolean doFullValidation) throws IOException {
HoodieTableMetadata tableMetadata = metadata(writeConfig, context);
assertNotNull(tableMetadata, "MetadataReader should have been initialized");
if (!writeConfig.isMetadataTableEnabled()) {
return;
}
if (!tableMetadata.getSyncedInstantTime().isPresent() || tableMetadata instanceof FileSystemBackedTableMetadata) {
throw new IllegalStateException("Metadata should have synced some commits or tableMetadata should not be an instance " + "of FileSystemBackedTableMetadata");
}
assertEquals(inflightCommits, testTable.inflightCommits());
HoodieTimer timer = new HoodieTimer().startTimer();
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
// Partitions should match
List<java.nio.file.Path> fsPartitionPaths = testTable.getAllPartitionPaths();
List<String> fsPartitions = new ArrayList<>();
fsPartitionPaths.forEach(entry -> fsPartitions.add(entry.getFileName().toString()));
if (fsPartitions.isEmpty()) {
fsPartitions.add("");
}
List<String> metadataPartitions = tableMetadata.getAllPartitionPaths();
Collections.sort(fsPartitions);
Collections.sort(metadataPartitions);
assertEquals(fsPartitions.size(), metadataPartitions.size(), "Partitions should match");
assertEquals(fsPartitions, metadataPartitions, "Partitions should match");
// Files within each partition should match
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieSparkTable.create(writeConfig, engineContext, true);
TableFileSystemView tableView = table.getHoodieView();
List<String> fullPartitionPaths = fsPartitions.stream().map(partition -> basePath + "/" + partition).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = tableMetadata.getAllFilesInPartitions(fullPartitionPaths);
assertEquals(fsPartitions.size(), partitionToFilesMap.size());
fsPartitions.forEach(partition -> {
try {
validateFilesPerPartition(testTable, tableMetadata, tableView, partitionToFilesMap, partition);
} catch (IOException e) {
fail("Exception should not be raised: " + e);
}
});
if (doFullValidation) {
runFullValidation(writeConfig, metadataTableBasePath, engineContext);
}
LOG.info("Validation time=" + timer.endTimer());
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class AbstractTableFileSystemView method addFilesToView.
/**
* Adds the provided statuses into the file system view, and also caches it inside this object.
*/
public List<HoodieFileGroup> addFilesToView(FileStatus[] statuses) {
HoodieTimer timer = new HoodieTimer().startTimer();
List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, visibleCommitsAndCompactionTimeline, true);
long fgBuildTimeTakenMs = timer.endTimer();
timer.startTimer();
// Group by partition for efficient updates for both InMemory and DiskBased stuctures.
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).forEach((partition, value) -> {
if (!isPartitionAvailableInStore(partition)) {
if (bootstrapIndex.useIndex()) {
try (BootstrapIndex.IndexReader reader = bootstrapIndex.createReader()) {
LOG.info("Bootstrap Index available for partition " + partition);
List<BootstrapFileMapping> sourceFileMappings = reader.getSourceFileMappingForPartition(partition);
addBootstrapBaseFileMapping(sourceFileMappings.stream().map(s -> new BootstrapBaseFileMapping(new HoodieFileGroupId(s.getPartitionPath(), s.getFileId()), s.getBootstrapFileStatus())));
}
}
storePartitionView(partition, value);
}
});
long storePartitionsTs = timer.endTimer();
LOG.info("addFilesToView: NumFiles=" + statuses.length + ", NumFileGroups=" + fileGroups.size() + ", FileGroupsCreationTime=" + fgBuildTimeTakenMs + ", StoreTimeTaken=" + storePartitionsTs);
return fileGroups;
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class HiveQueryDDLExecutor method updateHiveSQLs.
private List<CommandProcessorResponse> updateHiveSQLs(List<String> sqls) {
List<CommandProcessorResponse> responses = new ArrayList<>();
try {
for (String sql : sqls) {
if (hiveDriver != null) {
HoodieTimer timer = new HoodieTimer().startTimer();
responses.add(hiveDriver.run(sql));
LOG.info(String.format("Time taken to execute [%s]: %s ms", sql, timer.endTimer()));
}
}
} catch (Exception e) {
throw new HoodieHiveSyncException("Failed in executing SQL", e);
}
return responses;
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class RequestHandler method jsonifyResult.
/**
* Serializes the result into JSON String.
*
* @param ctx Javalin context
* @param obj object to serialize
* @param metricsRegistry {@code Registry} instance for storing metrics
* @param objectMapper JSON object mapper
* @param logger {@code Logger} instance
* @return JSON String from the input object
* @throws JsonProcessingException
*/
public static String jsonifyResult(Context ctx, Object obj, Registry metricsRegistry, ObjectMapper objectMapper, Logger logger) throws JsonProcessingException {
HoodieTimer timer = new HoodieTimer().startTimer();
boolean prettyPrint = ctx.queryParam("pretty") != null;
String result = prettyPrint ? objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) : objectMapper.writeValueAsString(obj);
final long jsonifyTime = timer.endTimer();
metricsRegistry.add("WRITE_VALUE_CNT", 1);
metricsRegistry.add("WRITE_VALUE_TIME", jsonifyTime);
if (logger.isDebugEnabled()) {
logger.debug("Jsonify TimeTaken=" + jsonifyTime);
}
return result;
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class HoodieBackedTableMetadataWriter method initializeIfNeeded.
/**
* Initialize the metadata table if needed.
*
* @param dataMetaClient - meta client for the data table
* @param actionMetadata - optional action metadata
* @param inflightInstantTimestamp - timestamp of an instant in progress on the dataset
* @param <T> - action metadata types extending Avro generated SpecificRecordBase
* @throws IOException
*/
protected <T extends SpecificRecordBase> void initializeIfNeeded(HoodieTableMetaClient dataMetaClient, Option<T> actionMetadata, Option<String> inflightInstantTimestamp) throws IOException {
HoodieTimer timer = new HoodieTimer().startTimer();
boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
boolean reInitialize = false;
// the metadata table will need to be initialized again.
if (exists) {
HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()).setBasePath(metadataWriteConfig.getBasePath()).build();
if (dataWriteConfig.getMetadataConfig().populateMetaFields() != metadataMetaClient.getTableConfig().populateMetaFields()) {
LOG.info("Re-initiating metadata table properties since populate meta fields have changed");
metadataMetaClient = initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
}
final Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
reInitialize = isBootstrapNeeded(latestMetadataInstant, actionMetadata);
}
if (reInitialize) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.REBOOTSTRAP_STR, 1));
LOG.info("Deleting Metadata Table directory so that it can be re-initialized");
dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath()), true);
exists = false;
}
if (!exists) {
// Initialize for the first time by listing partitions and files directly from the file system
if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
}
}
}
Aggregations