Search in sources :

Example 96 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestAsyncCompaction method testCompactionAfterTwoDeltaCommits.

@Test
public void testCompactionAfterTwoDeltaCommits() throws Exception {
    // No Delta Commits after compaction request
    HoodieWriteConfig cfg = getConfig(true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
        scheduleAndExecuteCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, false);
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 97 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestHoodieCompactor method testCompactionEmpty.

@Test
public void testCompactionEmpty() throws Exception {
    HoodieWriteConfig config = getConfig();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(getConfig(), context, metaClient);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        String newCommitTime = writeClient.startCommit();
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
        JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
        writeClient.insert(recordsRDD, newCommitTime).collect();
        String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
        Option<HoodieCompactionPlan> plan = table.scheduleCompaction(context, compactionInstantTime, Option.empty());
        assertFalse(plan.isPresent(), "If there is nothing to compact, result will be empty");
    }
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 98 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class HoodieClientRollbackTestBase method twoUpsertCommitDataWithTwoPartitions.

protected void twoUpsertCommitDataWithTwoPartitions(List<FileSlice> firstPartitionCommit2FileSlices, List<FileSlice> secondPartitionCommit2FileSlices, HoodieWriteConfig cfg, boolean commitSecondUpsert) throws IOException {
    // just generate two partitions
    dataGen = new HoodieTestDataGenerator(new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH });
    // 1. prepare data
    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH }, basePath);
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    /**
     * Write 1 (only inserts)
     */
    String newCommitTime = "001";
    client.startCommitWithTime(newCommitTime);
    List<HoodieRecord> records = dataGen.generateInsertsContainsAllPartitions(newCommitTime, 2);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    JavaRDD<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime);
    Assertions.assertNoWriteErrors(statuses.collect());
    client.commit(newCommitTime, statuses);
    /**
     * Write 2 (updates)
     */
    newCommitTime = "002";
    client.startCommitWithTime(newCommitTime);
    records = dataGen.generateUpdates(newCommitTime, records);
    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime);
    Assertions.assertNoWriteErrors(statuses.collect());
    if (commitSecondUpsert) {
        client.commit(newCommitTime, statuses);
    }
    // 2. assert file group and get the first partition file slice
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
    List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, firstPartitionCommit2FileGroups.size());
    firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    // 3. assert file group and get the second partition file slice
    List<HoodieFileGroup> secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, secondPartitionCommit2FileGroups.size());
    secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    // 4. assert file slice
    HoodieTableType tableType = this.getTableType();
    if (tableType.equals(HoodieTableType.COPY_ON_WRITE)) {
        assertEquals(2, firstPartitionCommit2FileSlices.size());
        assertEquals(2, secondPartitionCommit2FileSlices.size());
    } else {
        assertEquals(1, firstPartitionCommit2FileSlices.size());
        assertEquals(1, secondPartitionCommit2FileSlices.size());
    }
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 99 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class HoodieClientRollbackTestBase method insertOverwriteCommitDataWithTwoPartitions.

protected void insertOverwriteCommitDataWithTwoPartitions(List<FileSlice> firstPartitionCommit2FileSlices, List<FileSlice> secondPartitionCommit2FileSlices, HoodieWriteConfig cfg, boolean commitSecondInsertOverwrite) throws IOException {
    // just generate two partitions
    dataGen = new HoodieTestDataGenerator(new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH });
    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH }, basePath);
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    /**
     * Write 1 (upsert)
     */
    String newCommitTime = "001";
    List<HoodieRecord> records = dataGen.generateInsertsContainsAllPartitions(newCommitTime, 2);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    client.startCommitWithTime(newCommitTime);
    JavaRDD<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime);
    Assertions.assertNoWriteErrors(statuses.collect());
    client.commit(newCommitTime, statuses);
    // get fileIds written
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
    List<HoodieFileGroup> firstPartitionCommit1FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, firstPartitionCommit1FileGroups.size());
    Set<String> partition1Commit1FileIds = firstPartitionCommit1FileGroups.get(0).getAllFileSlices().map(FileSlice::getFileId).collect(Collectors.toSet());
    List<HoodieFileGroup> secondPartitionCommit1FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
    assertEquals(1, secondPartitionCommit1FileGroups.size());
    Set<String> partition2Commit1FileIds = secondPartitionCommit1FileGroups.get(0).getAllFileSlices().map(FileSlice::getFileId).collect(Collectors.toSet());
    /**
     * Write 2 (one insert_overwrite)
     */
    String commitActionType = HoodieTimeline.REPLACE_COMMIT_ACTION;
    newCommitTime = "002";
    records = dataGen.generateInsertsContainsAllPartitions(newCommitTime, 2);
    writeRecords = jsc.parallelize(records, 1);
    client.startCommitWithTime(newCommitTime, commitActionType);
    HoodieWriteResult result = client.insertOverwrite(writeRecords, newCommitTime);
    statuses = result.getWriteStatuses();
    Assertions.assertNoWriteErrors(statuses.collect());
    if (commitSecondInsertOverwrite) {
        client.commit(newCommitTime, statuses, Option.empty(), commitActionType, result.getPartitionToReplaceFileIds());
    }
    metaClient.reloadActiveTimeline();
    // get new fileIds written as part of insert_overwrite
    fsView = getFileSystemViewWithUnCommittedSlices(metaClient);
    List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).filter(fg -> !partition1Commit1FileIds.contains(fg.getFileGroupId().getFileId())).collect(Collectors.toList());
    firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    List<HoodieFileGroup> secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).filter(fg -> !partition2Commit1FileIds.contains(fg.getFileGroupId().getFileId())).collect(Collectors.toList());
    secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
    assertEquals(1, firstPartitionCommit2FileSlices.size());
    assertEquals(1, secondPartitionCommit2FileSlices.size());
}
Also used : SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) DEFAULT_SECOND_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Set(java.util.Set) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) DEFAULT_FIRST_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) Collectors(java.util.stream.Collectors) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions(org.apache.hudi.testutils.Assertions) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieWriteResult(org.apache.hudi.client.HoodieWriteResult) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteResult(org.apache.hudi.client.HoodieWriteResult) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 100 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestCopyOnWriteRollbackActionExecutor method testCopyOnWriteRollbackActionExecutor.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testCopyOnWriteRollbackActionExecutor(boolean isUsingMarkers) throws IOException {
    // 1. prepare data and assert data result
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    HoodieWriteConfig cfg = getConfigBuilder().withRollbackUsingMarkers(isUsingMarkers).withAutoCommit(false).build();
    this.twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, !isUsingMarkers);
    metaClient.reloadActiveTimeline();
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    performRollbackAndValidate(isUsingMarkers, cfg, table, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
}
Also used : FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTable(org.apache.hudi.table.HoodieTable) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieTable (org.apache.hudi.table.HoodieTable)133 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)105 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)76 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)75 List (java.util.List)64 Test (org.junit.jupiter.api.Test)63 ArrayList (java.util.ArrayList)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)57 WriteStatus (org.apache.hudi.client.WriteStatus)49 Path (org.apache.hadoop.fs.Path)48 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)46 Option (org.apache.hudi.common.util.Option)46 IOException (java.io.IOException)44 Map (java.util.Map)44 Collectors (java.util.stream.Collectors)44 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)43 HashMap (java.util.HashMap)41 Pair (org.apache.hudi.common.util.collection.Pair)39 HoodieKey (org.apache.hudi.common.model.HoodieKey)38 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)38