Search in sources :

Example 1 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class FlinkDeleteHelper method execute.

@Override
public HoodieWriteMetadata<List<WriteStatus>> execute(String instantTime, List<HoodieKey> keys, HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>> table, BaseCommitActionExecutor<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> deleteExecutor) {
    try {
        HoodieWriteMetadata<List<WriteStatus>> result = null;
        List<HoodieKey> dedupedKeys = keys;
        final int parallelism = config.getDeleteShuffleParallelism();
        if (config.shouldCombineBeforeDelete()) {
            // De-dupe/merge if needed
            dedupedKeys = deduplicateKeys(keys, table, parallelism);
        }
        List<HoodieRecord<EmptyHoodieRecordPayload>> dedupedRecords = dedupedKeys.stream().map(key -> new HoodieAvroRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
        Instant beginTag = Instant.now();
        // perform index look up to get existing location of records
        List<HoodieRecord<EmptyHoodieRecordPayload>> taggedRecords = HoodieList.getList(table.getIndex().tagLocation(HoodieList.of(dedupedRecords), context, table));
        Duration tagLocationDuration = Duration.between(beginTag, Instant.now());
        // filter out non existent keys/records
        List<HoodieRecord<EmptyHoodieRecordPayload>> taggedValidRecords = taggedRecords.stream().filter(HoodieRecord::isCurrentLocationKnown).collect(Collectors.toList());
        if (!taggedValidRecords.isEmpty()) {
            result = deleteExecutor.execute(taggedValidRecords);
            result.setIndexLookupDuration(tagLocationDuration);
        } else {
            // if entire set of keys are non existent
            deleteExecutor.saveWorkloadProfileMetadataToInflight(new WorkloadProfile(Pair.of(new HashMap<>(), new WorkloadStat())), instantTime);
            result = new HoodieWriteMetadata<>();
            result.setWriteStatuses(Collections.EMPTY_LIST);
            deleteExecutor.commitOnAutoCommit(result);
        }
        return result;
    } catch (Throwable e) {
        if (e instanceof HoodieUpsertException) {
            throw (HoodieUpsertException) e;
        }
        throw new HoodieUpsertException("Failed to delete for commit time " + instantTime, e);
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieList(org.apache.hudi.common.data.HoodieList) HashSet(java.util.HashSet) WriteStatus(org.apache.hudi.client.WriteStatus) List(java.util.List) Duration(java.time.Duration) WorkloadProfile(org.apache.hudi.table.WorkloadProfile) HoodieKey(org.apache.hudi.common.model.HoodieKey) WorkloadStat(org.apache.hudi.table.WorkloadStat) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) WorkloadProfile(org.apache.hudi.table.WorkloadProfile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Instant(java.time.Instant) Duration(java.time.Duration) WorkloadStat(org.apache.hudi.table.WorkloadStat) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) HoodieList(org.apache.hudi.common.data.HoodieList) List(java.util.List) LinkedList(java.util.LinkedList)

Example 2 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class HoodieFlinkWriteClient method insertOverwriteTable.

/**
 * Removes all existing records of the Hoodie table and inserts the given HoodieRecords, into the table.
 *
 * @param records     HoodieRecords to insert
 * @param instantTime Instant time of the commit
 * @return list of WriteStatus to inspect errors and counts
 */
public List<WriteStatus> insertOverwriteTable(List<HoodieRecord<T>> records, final String instantTime) {
    HoodieTable table = initTable(WriteOperationType.INSERT_OVERWRITE_TABLE, Option.ofNullable(instantTime));
    table.validateInsertSchema();
    preWrite(instantTime, WriteOperationType.INSERT_OVERWRITE_TABLE, table.getMetaClient());
    // create the write handle if not exists
    final HoodieWriteHandle<?, ?, ?, ?> writeHandle = getOrCreateWriteHandle(records.get(0), getConfig(), instantTime, table, records.listIterator());
    HoodieWriteMetadata result = ((HoodieFlinkTable<T>) table).insertOverwriteTable(context, writeHandle, instantTime, records);
    return postWrite(result, instantTime, table);
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable)

Example 3 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestCopyOnWriteActionExecutor method testMakeNewPath.

@Test
public void testMakeNewPath() {
    String fileName = UUID.randomUUID().toString();
    String partitionPath = "2016/05/04";
    String instantTime = makeNewCommitTime();
    HoodieWriteConfig config = makeHoodieClientConfig();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(config, context, metaClient);
    Pair<Path, String> newPathWithWriteToken = jsc.parallelize(Arrays.asList(1)).map(x -> {
        HoodieRecord record = mock(HoodieRecord.class);
        when(record.getPartitionPath()).thenReturn(partitionPath);
        String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
        HoodieCreateHandle io = new HoodieCreateHandle(config, instantTime, table, partitionPath, fileName, supplier);
        return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
    }).collect().get(0);
    assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath, FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class CompactionTestBase method executeCompaction.

protected void executeCompaction(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
    client.compact(compactionInstantTime);
    assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, compactionInstantTime).doesMarkerDirExist());
    List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
    assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
    assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)), "Verify all file-slices have base-instant same as compaction instant");
    assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()), "Verify all file-slices have data-files");
    if (hasDeltaCommitAfterPendingCompaction) {
        assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0), "Verify all file-slices have atleast one log-file");
    } else {
        assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0), "Verify all file-slices have no log-files");
    }
    // verify that there is a commit
    table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
    HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
    String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
    assertEquals(latestCompactionCommitTime, compactionInstantTime, "Expect compaction instant time to be the latest commit time");
    assertEquals(expectedNumRecs, HoodieClientTestUtils.countRecordsOptionallySince(jsc, basePath, sqlContext, timeline, Option.of("000")), "Must contain expected records");
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FileSystemViewStorageType(org.apache.hudi.common.table.view.FileSystemViewStorageType) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Example 5 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class CompactionTestBase method validateDeltaCommit.

/**
 * HELPER METHODS FOR TESTING.
 */
protected void validateDeltaCommit(String latestDeltaCommit, final Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> fgIdToCompactionOperation, HoodieWriteConfig cfg) {
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    HoodieTable table = getHoodieTable(metaClient, cfg);
    List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
    fileSliceList.forEach(fileSlice -> {
        Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId());
        if (opPair != null) {
            assertEquals(fileSlice.getBaseInstantTime(), opPair.getKey(), "Expect baseInstant to match compaction Instant");
            assertTrue(fileSlice.getLogFiles().count() > 0, "Expect atleast one log file to be present where the latest delta commit was written");
            assertFalse(fileSlice.getBaseFile().isPresent(), "Expect no data-file to be present");
        } else {
            assertTrue(fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0, "Expect baseInstant to be less than or equal to latestDeltaCommit");
        }
    });
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation)

Aggregations

HoodieTable (org.apache.hudi.table.HoodieTable)133 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)105 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)76 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)75 List (java.util.List)64 Test (org.junit.jupiter.api.Test)63 ArrayList (java.util.ArrayList)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)57 WriteStatus (org.apache.hudi.client.WriteStatus)49 Path (org.apache.hadoop.fs.Path)48 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)46 Option (org.apache.hudi.common.util.Option)46 IOException (java.io.IOException)44 Map (java.util.Map)44 Collectors (java.util.stream.Collectors)44 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)43 HashMap (java.util.HashMap)41 Pair (org.apache.hudi.common.util.collection.Pair)39 HoodieKey (org.apache.hudi.common.model.HoodieKey)38 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)38