use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class ITTestCompactionCommand method generateCommits.
private void generateCommits() throws IOException {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
// Create the write client to write some records in
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
SparkRDDWriteClient<HoodieAvroPayload> client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), cfg);
List<HoodieRecord> records = insert(jsc, client, dataGen);
upsert(jsc, client, dataGen, records);
delete(jsc, client, records);
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class HoodieSparkCopyOnWriteTable method updateColumnsStatsIndex.
private void updateColumnsStatsIndex(@Nonnull HoodieEngineContext context, @Nonnull List<HoodieWriteStat> updatedFilesStats, @Nonnull String instantTime) throws Exception {
String sortColsList = config.getClusteringSortColumns();
String basePath = metaClient.getBasePath();
String indexPath = metaClient.getColumnStatsIndexPath();
List<String> touchedFiles = updatedFilesStats.stream().map(s -> new Path(basePath, s.getPath()).toString()).collect(Collectors.toList());
if (touchedFiles.isEmpty() || StringUtils.isNullOrEmpty(sortColsList) || StringUtils.isNullOrEmpty(indexPath)) {
return;
}
LOG.info(String.format("Updating column-statistics index table (%s)", indexPath));
List<String> sortCols = Arrays.stream(sortColsList.split(",")).map(String::trim).collect(Collectors.toList());
HoodieSparkEngineContext sparkEngineContext = (HoodieSparkEngineContext) context;
// Fetch table schema to appropriately construct col-stats index schema
Schema tableWriteSchema = HoodieAvroUtils.createHoodieWriteSchema(new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields());
List<String> completedCommits = metaClient.getCommitsTimeline().filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
ColumnStatsIndexHelper.updateColumnStatsIndexFor(sparkEngineContext.getSqlContext().sparkSession(), AvroConversionUtils.convertAvroSchemaToStructType(tableWriteSchema), touchedFiles, sortCols, indexPath, instantTime, completedCommits);
LOG.info(String.format("Successfully updated column-statistics index at instant (%s)", instantTime));
}
use of org.apache.hudi.client.common.HoodieSparkEngineContext in project hudi by apache.
the class SparkRDDWriteClient method initWrapperFSMetrics.
@Override
protected void initWrapperFSMetrics() {
if (config.isMetricsOn()) {
Registry registry;
Registry registryMeta;
JavaSparkContext jsc = ((HoodieSparkEngineContext) context).getJavaSparkContext();
if (config.isExecutorMetricsEnabled()) {
// Create a distributed registry for HoodieWrapperFileSystem
registry = Registry.getRegistry(HoodieWrapperFileSystem.class.getSimpleName(), DistributedRegistry.class.getName());
((DistributedRegistry) registry).register(jsc);
registryMeta = Registry.getRegistry(HoodieWrapperFileSystem.class.getSimpleName() + "MetaFolder", DistributedRegistry.class.getName());
((DistributedRegistry) registryMeta).register(jsc);
} else {
registry = Registry.getRegistry(HoodieWrapperFileSystem.class.getSimpleName());
registryMeta = Registry.getRegistry(HoodieWrapperFileSystem.class.getSimpleName() + "MetaFolder");
}
HoodieWrapperFileSystem.setMetricsRegistry(registry, registryMeta);
}
}
Aggregations