Search in sources :

Example 86 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class HoodieCleaner method run.

public void run() {
    HoodieWriteConfig hoodieCfg = getHoodieClientConfig();
    SparkRDDWriteClient client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg);
    client.clean();
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig)

Example 87 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class ITTestClusteringCommand method generateCommits.

private void generateCommits() throws IOException {
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
    // Create the write client to write some records in
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
    SparkRDDWriteClient<HoodieAvroPayload> client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), cfg);
    insert(jsc, client, dataGen, "001");
    insert(jsc, client, dataGen, "002");
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload)

Example 88 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class AsyncCleanerService method startAsyncCleaningIfEnabled.

public static AsyncCleanerService startAsyncCleaningIfEnabled(BaseHoodieWriteClient writeClient) {
    HoodieWriteConfig config = writeClient.getConfig();
    if (!config.isAutoClean() || !config.isAsyncClean()) {
        LOG.info("The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start.");
        return null;
    }
    AsyncCleanerService asyncCleanerService = new AsyncCleanerService(writeClient);
    asyncCleanerService.start(null);
    return asyncCleanerService;
}
Also used : HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig)

Example 89 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class HoodieJavaWriteClientExample method main.

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: HoodieJavaWriteClientExample <tablePath> <tableName>");
        System.exit(1);
    }
    String tablePath = args[0];
    String tableName = args[1];
    // Generator of some records to be loaded in.
    HoodieExampleDataGenerator<HoodieAvroPayload> dataGen = new HoodieExampleDataGenerator<>();
    Configuration hadoopConf = new Configuration();
    // initialize the table, if not done already
    Path path = new Path(tablePath);
    FileSystem fs = FSUtils.getFs(tablePath, hadoopConf);
    if (!fs.exists(path)) {
        HoodieTableMetaClient.withPropertyBuilder().setTableType(tableType).setTableName(tableName).setPayloadClassName(HoodieAvroPayload.class.getName()).initTable(hadoopConf, tablePath);
    }
    // Create the write client to write some records in
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieExampleDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withDeleteParallelism(2).forTable(tableName).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(20, 30).build()).build();
    HoodieJavaWriteClient<HoodieAvroPayload> client = new HoodieJavaWriteClient<>(new HoodieJavaEngineContext(hadoopConf), cfg);
    // inserts
    String newCommitTime = client.startCommit();
    LOG.info("Starting commit " + newCommitTime);
    List<HoodieRecord<HoodieAvroPayload>> records = dataGen.generateInserts(newCommitTime, 10);
    List<HoodieRecord<HoodieAvroPayload>> recordsSoFar = new ArrayList<>(records);
    List<HoodieRecord<HoodieAvroPayload>> writeRecords = recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
    client.insert(writeRecords, newCommitTime);
    // updates
    newCommitTime = client.startCommit();
    LOG.info("Starting commit " + newCommitTime);
    List<HoodieRecord<HoodieAvroPayload>> toBeUpdated = dataGen.generateUpdates(newCommitTime, 2);
    records.addAll(toBeUpdated);
    recordsSoFar.addAll(toBeUpdated);
    writeRecords = recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
    client.upsert(writeRecords, newCommitTime);
    // Delete
    newCommitTime = client.startCommit();
    LOG.info("Starting commit " + newCommitTime);
    // just delete half of the records
    int numToDelete = recordsSoFar.size() / 2;
    List<HoodieKey> toBeDeleted = recordsSoFar.stream().map(HoodieRecord::getKey).limit(numToDelete).collect(Collectors.toList());
    client.delete(toBeDeleted, newCommitTime);
    client.close();
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieExampleDataGenerator(org.apache.hudi.examples.common.HoodieExampleDataGenerator) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieJavaEngineContext(org.apache.hudi.client.common.HoodieJavaEngineContext) HoodieJavaWriteClient(org.apache.hudi.client.HoodieJavaWriteClient) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieIndex(org.apache.hudi.index.HoodieIndex) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) List(java.util.List) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) Path(org.apache.hadoop.fs.Path) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Configuration(org.apache.hadoop.conf.Configuration) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieJavaWriteClient(org.apache.hudi.client.HoodieJavaWriteClient) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) HoodieExampleDataGenerator(org.apache.hudi.examples.common.HoodieExampleDataGenerator) HoodieJavaEngineContext(org.apache.hudi.client.common.HoodieJavaEngineContext)

Example 90 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class BucketAssignFunction method open.

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);
    HoodieWriteConfig writeConfig = StreamerUtil.getHoodieClientConfig(this.conf, true);
    HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(StreamerUtil.getHadoopConf()), new FlinkTaskContextSupplier(getRuntimeContext()));
    this.bucketAssigner = BucketAssigners.create(getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getMaxNumberOfParallelSubtasks(), getRuntimeContext().getNumberOfParallelSubtasks(), ignoreSmallFiles(), HoodieTableType.valueOf(conf.getString(FlinkOptions.TABLE_TYPE)), context, writeConfig);
    this.payloadCreation = PayloadCreation.instance(this.conf);
}
Also used : SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFlinkEngineContext(org.apache.hudi.client.common.HoodieFlinkEngineContext) FlinkTaskContextSupplier(org.apache.hudi.client.FlinkTaskContextSupplier)

Aggregations

HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)327 Test (org.junit.jupiter.api.Test)179 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)173 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)169 ArrayList (java.util.ArrayList)136 List (java.util.List)133 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)126 HoodieTable (org.apache.hudi.table.HoodieTable)117 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)111 HashMap (java.util.HashMap)93 Path (org.apache.hadoop.fs.Path)92 WriteStatus (org.apache.hudi.client.WriteStatus)86 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)84 Collectors (java.util.stream.Collectors)81 Map (java.util.Map)76 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)76 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)74 Arrays (java.util.Arrays)73 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)72 Option (org.apache.hudi.common.util.Option)69