use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class HoodieCleaner method run.
public void run() {
HoodieWriteConfig hoodieCfg = getHoodieClientConfig();
SparkRDDWriteClient client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg);
client.clean();
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class ITTestClusteringCommand method generateCommits.
private void generateCommits() throws IOException {
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
// Create the write client to write some records in
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
SparkRDDWriteClient<HoodieAvroPayload> client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), cfg);
insert(jsc, client, dataGen, "001");
insert(jsc, client, dataGen, "002");
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class AsyncCleanerService method startAsyncCleaningIfEnabled.
public static AsyncCleanerService startAsyncCleaningIfEnabled(BaseHoodieWriteClient writeClient) {
HoodieWriteConfig config = writeClient.getConfig();
if (!config.isAutoClean() || !config.isAsyncClean()) {
LOG.info("The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start.");
return null;
}
AsyncCleanerService asyncCleanerService = new AsyncCleanerService(writeClient);
asyncCleanerService.start(null);
return asyncCleanerService;
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class HoodieJavaWriteClientExample method main.
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: HoodieJavaWriteClientExample <tablePath> <tableName>");
System.exit(1);
}
String tablePath = args[0];
String tableName = args[1];
// Generator of some records to be loaded in.
HoodieExampleDataGenerator<HoodieAvroPayload> dataGen = new HoodieExampleDataGenerator<>();
Configuration hadoopConf = new Configuration();
// initialize the table, if not done already
Path path = new Path(tablePath);
FileSystem fs = FSUtils.getFs(tablePath, hadoopConf);
if (!fs.exists(path)) {
HoodieTableMetaClient.withPropertyBuilder().setTableType(tableType).setTableName(tableName).setPayloadClassName(HoodieAvroPayload.class.getName()).initTable(hadoopConf, tablePath);
}
// Create the write client to write some records in
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieExampleDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(20, 30).build()).build();
HoodieJavaWriteClient<HoodieAvroPayload> client = new HoodieJavaWriteClient<>(new HoodieJavaEngineContext(hadoopConf), cfg);
// inserts
String newCommitTime = client.startCommit();
LOG.info("Starting commit " + newCommitTime);
List<HoodieRecord<HoodieAvroPayload>> records = dataGen.generateInserts(newCommitTime, 10);
List<HoodieRecord<HoodieAvroPayload>> recordsSoFar = new ArrayList<>(records);
List<HoodieRecord<HoodieAvroPayload>> writeRecords = recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
client.insert(writeRecords, newCommitTime);
// updates
newCommitTime = client.startCommit();
LOG.info("Starting commit " + newCommitTime);
List<HoodieRecord<HoodieAvroPayload>> toBeUpdated = dataGen.generateUpdates(newCommitTime, 2);
records.addAll(toBeUpdated);
recordsSoFar.addAll(toBeUpdated);
writeRecords = recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
client.upsert(writeRecords, newCommitTime);
// Delete
newCommitTime = client.startCommit();
LOG.info("Starting commit " + newCommitTime);
// just delete half of the records
int numToDelete = recordsSoFar.size() / 2;
List<HoodieKey> toBeDeleted = recordsSoFar.stream().map(HoodieRecord::getKey).limit(numToDelete).collect(Collectors.toList());
client.delete(toBeDeleted, newCommitTime);
client.close();
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class BucketAssignFunction method open.
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
HoodieWriteConfig writeConfig = StreamerUtil.getHoodieClientConfig(this.conf, true);
HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(StreamerUtil.getHadoopConf()), new FlinkTaskContextSupplier(getRuntimeContext()));
this.bucketAssigner = BucketAssigners.create(getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getMaxNumberOfParallelSubtasks(), getRuntimeContext().getNumberOfParallelSubtasks(), ignoreSmallFiles(), HoodieTableType.valueOf(conf.getString(FlinkOptions.TABLE_TYPE)), context, writeConfig);
this.payloadCreation = PayloadCreation.instance(this.conf);
}
Aggregations