Search in sources :

Example 71 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class BaseHoodieWriteClient method initTable.

/**
 * Instantiates and initializes instance of {@link HoodieTable}, performing crucial bootstrapping
 * operations such as:
 *
 * NOTE: This method is engine-agnostic and SHOULD NOT be overloaded, please check on
 * {@link #doInitTable(HoodieTableMetaClient, Option<String>)} instead
 *
 * <ul>
 *   <li>Checking whether upgrade/downgrade is required</li>
 *   <li>Bootstrapping Metadata Table (if required)</li>
 *   <li>Initializing metrics contexts</li>
 * </ul>
 */
protected final HoodieTable initTable(WriteOperationType operationType, Option<String> instantTime) {
    HoodieTableMetaClient metaClient = createMetaClient(true);
    // Setup write schemas for deletes
    if (operationType == WriteOperationType.DELETE) {
        setWriteSchemaForDeletes(metaClient);
    }
    HoodieTable table;
    this.txnManager.beginTransaction();
    try {
        tryUpgrade(metaClient, instantTime);
        table = doInitTable(metaClient, instantTime);
    } finally {
        this.txnManager.endTransaction();
    }
    // Validate table properties
    metaClient.validateTableProperties(config.getProps(), operationType);
    // Make sure that FS View is in sync
    table.getHoodieView().sync();
    switch(operationType) {
        case INSERT:
        case INSERT_PREPPED:
        case UPSERT:
        case UPSERT_PREPPED:
        case BULK_INSERT:
        case BULK_INSERT_PREPPED:
        case INSERT_OVERWRITE:
        case INSERT_OVERWRITE_TABLE:
            setWriteTimer(table);
            break;
        case CLUSTER:
            clusteringTimer = metrics.getClusteringCtx();
            break;
        case COMPACT:
            compactionTimer = metrics.getCompactionCtx();
            break;
        default:
    }
    return table;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable)

Example 72 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class BaseHoodieWriteClient method rollback.

/**
 * @Deprecated
 * Rollback the inflight record changes with the given commit time. This
 * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String)}
 *
 * @param commitInstantTime Instant time of the commit
 * @param pendingRollbackInfo pending rollback instant and plan if rollback failed from previous attempt.
 * @param skipLocking if this is triggered by another parent transaction, locking can be skipped.
 * @throws HoodieRollbackException if rollback cannot be performed successfully
 */
@Deprecated
public boolean rollback(final String commitInstantTime, Option<HoodiePendingRollbackInfo> pendingRollbackInfo, boolean skipLocking) throws HoodieRollbackException {
    LOG.info("Begin rollback of instant " + commitInstantTime);
    final String rollbackInstantTime = pendingRollbackInfo.map(entry -> entry.getRollbackInstant().getTimestamp()).orElse(HoodieActiveTimeline.createNewInstantTime());
    final Timer.Context timerContext = this.metrics.getRollbackCtx();
    try {
        HoodieTable<T, I, K, O> table = createTable(config, hadoopConf);
        Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants().filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime)).findFirst());
        if (commitInstantOpt.isPresent()) {
            LOG.info("Scheduling Rollback at instant time :" + rollbackInstantTime);
            Option<HoodieRollbackPlan> rollbackPlanOption = pendingRollbackInfo.map(entry -> Option.of(entry.getRollbackPlan())).orElseGet(() -> table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers()));
            if (rollbackPlanOption.isPresent()) {
                // execute rollback
                HoodieRollbackMetadata rollbackMetadata = table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true, skipLocking);
                if (timerContext != null) {
                    long durationInMs = metrics.getDurationInMs(timerContext.stop());
                    metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted());
                }
                return true;
            } else {
                throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime);
            }
        } else {
            LOG.warn("Cannot find instant " + commitInstantTime + " in the timeline, for rollback");
            return false;
        }
    } catch (Exception e) {
        throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime, e);
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) HoodiePendingRollbackInfo(org.apache.hudi.common.HoodiePendingRollbackInfo) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) Logger(org.apache.log4j.Logger) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) SupportsUpgradeDowngrade(org.apache.hudi.table.upgrade.SupportsUpgradeDowngrade) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieMetrics(org.apache.hudi.metrics.HoodieMetrics) RollbackUtils(org.apache.hudi.table.action.rollback.RollbackUtils) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) Collection(java.util.Collection) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) SavepointHelpers(org.apache.hudi.table.action.savepoint.SavepointHelpers) Stream(java.util.stream.Stream) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieWriteCommitCallbackMessage(org.apache.hudi.callback.common.HoodieWriteCommitCallbackMessage) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) CommitUtils(org.apache.hudi.common.util.CommitUtils) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) AsyncCleanerService(org.apache.hudi.async.AsyncCleanerService) LinkedHashMap(java.util.LinkedHashMap) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) UpgradeDowngrade(org.apache.hudi.table.upgrade.UpgradeDowngrade) AsyncArchiveService(org.apache.hudi.async.AsyncArchiveService) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HeartbeatUtils(org.apache.hudi.client.heartbeat.HeartbeatUtils) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieWriteCommitCallback(org.apache.hudi.callback.HoodieWriteCommitCallback) HoodieCommitCallbackFactory(org.apache.hudi.callback.util.HoodieCommitCallbackFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) HoodieException(org.apache.hudi.exception.HoodieException) ParseException(java.text.ParseException) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Timer(com.codahale.metrics.Timer)

Example 73 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class BaseHoodieWriteClient method commitStats.

public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Option<Map<String, String>> extraMetadata, String commitActionType, Map<String, List<String>> partitionToReplaceFileIds) {
    // Skip the empty commit if not allowed
    if (!config.allowEmptyCommit() && stats.isEmpty()) {
        return true;
    }
    LOG.info("Committing " + instantTime + " action " + commitActionType);
    // Create a Hoodie table which encapsulated the commits and files visible
    HoodieTable table = createTable(config, hadoopConf);
    HoodieCommitMetadata metadata = CommitUtils.buildMetadata(stats, partitionToReplaceFileIds, extraMetadata, operationType, config.getWriteSchema(), commitActionType);
    HoodieInstant inflightInstant = new HoodieInstant(State.INFLIGHT, table.getMetaClient().getCommitActionType(), instantTime);
    HeartbeatUtils.abortIfHeartbeatExpired(instantTime, table, heartbeatClient, config);
    this.txnManager.beginTransaction(Option.of(inflightInstant), lastCompletedTxnAndMetadata.isPresent() ? Option.of(lastCompletedTxnAndMetadata.get().getLeft()) : Option.empty());
    try {
        preCommit(inflightInstant, metadata);
        commit(table, commitActionType, instantTime, metadata, stats);
        postCommit(table, metadata, instantTime, extraMetadata);
        LOG.info("Committed " + instantTime);
        releaseResources();
    } catch (IOException e) {
        throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, e);
    } finally {
        this.txnManager.endTransaction(Option.of(inflightInstant));
    }
    // do this outside of lock since compaction, clustering can be time taking and we don't need a lock for the entire execution period
    runTableServicesInline(table, metadata, extraMetadata);
    emitCommitMetrics(instantTime, metadata, commitActionType);
    // callback if needed.
    if (config.writeCommitCallbackOn()) {
        if (null == commitCallback) {
            commitCallback = HoodieCommitCallbackFactory.create(config);
        }
        commitCallback.call(new HoodieWriteCommitCallbackMessage(instantTime, config.getTableName(), config.getBasePath(), stats));
    }
    return true;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieWriteCommitCallbackMessage(org.apache.hudi.callback.common.HoodieWriteCommitCallbackMessage) HoodieTable(org.apache.hudi.table.HoodieTable) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 74 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestHoodieBackedMetadata method validateMetadata.

private void validateMetadata(SparkRDDWriteClient testClient) throws IOException {
    HoodieWriteConfig config = testClient.getConfig();
    SparkRDDWriteClient client;
    if (config.isEmbeddedTimelineServerEnabled()) {
        testClient.close();
        client = new SparkRDDWriteClient(testClient.getEngineContext(), testClient.getConfig());
    } else {
        client = testClient;
    }
    HoodieTableMetadata tableMetadata = metadata(client);
    assertNotNull(tableMetadata, "MetadataReader should have been initialized");
    if (!config.isMetadataTableEnabled()) {
        return;
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    // Partitions should match
    FileSystemBackedTableMetadata fsBackedTableMetadata = new FileSystemBackedTableMetadata(engineContext, new SerializableConfiguration(hadoopConf), config.getBasePath(), config.shouldAssumeDatePartitioning());
    List<String> fsPartitions = fsBackedTableMetadata.getAllPartitionPaths();
    List<String> metadataPartitions = tableMetadata.getAllPartitionPaths();
    Collections.sort(fsPartitions);
    Collections.sort(metadataPartitions);
    assertEquals(fsPartitions.size(), metadataPartitions.size(), "Partitions should match");
    assertTrue(fsPartitions.equals(metadataPartitions), "Partitions should match");
    // Files within each partition should match
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(config, engineContext);
    TableFileSystemView tableView = table.getHoodieView();
    List<String> fullPartitionPaths = fsPartitions.stream().map(partition -> basePath + "/" + partition).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = tableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    assertEquals(fsPartitions.size(), partitionToFilesMap.size());
    fsPartitions.forEach(partition -> {
        try {
            Path partitionPath;
            if (partition.equals("")) {
                // Should be the non-partitioned case
                partitionPath = new Path(basePath);
            } else {
                partitionPath = new Path(basePath, partition);
            }
            FileStatus[] fsStatuses = FSUtils.getAllDataFilesInPartition(fs, partitionPath);
            FileStatus[] metaStatuses = tableMetadata.getAllFilesInPartition(partitionPath);
            List<String> fsFileNames = Arrays.stream(fsStatuses).map(s -> s.getPath().getName()).collect(Collectors.toList());
            List<String> metadataFilenames = Arrays.stream(metaStatuses).map(s -> s.getPath().getName()).collect(Collectors.toList());
            Collections.sort(fsFileNames);
            Collections.sort(metadataFilenames);
            assertEquals(fsStatuses.length, partitionToFilesMap.get(partitionPath.toString()).length);
            // File sizes should be valid
            Arrays.stream(metaStatuses).forEach(s -> assertTrue(s.getLen() > 0));
            if ((fsFileNames.size() != metadataFilenames.size()) || (!fsFileNames.equals(metadataFilenames))) {
                LOG.info("*** File system listing = " + Arrays.toString(fsFileNames.toArray()));
                LOG.info("*** Metadata listing = " + Arrays.toString(metadataFilenames.toArray()));
                for (String fileName : fsFileNames) {
                    if (!metadataFilenames.contains(fileName)) {
                        LOG.error(partition + "FsFilename " + fileName + " not found in Meta data");
                    }
                }
                for (String fileName : metadataFilenames) {
                    if (!fsFileNames.contains(fileName)) {
                        LOG.error(partition + "Metadata file " + fileName + " not found in original FS");
                    }
                }
            }
            // Block sizes should be valid
            Arrays.stream(metaStatuses).forEach(s -> assertTrue(s.getBlockSize() > 0));
            List<Long> fsBlockSizes = Arrays.stream(fsStatuses).map(FileStatus::getBlockSize).collect(Collectors.toList());
            Collections.sort(fsBlockSizes);
            List<Long> metadataBlockSizes = Arrays.stream(metaStatuses).map(FileStatus::getBlockSize).collect(Collectors.toList());
            Collections.sort(metadataBlockSizes);
            assertEquals(fsBlockSizes, metadataBlockSizes);
            assertEquals(fsFileNames.size(), metadataFilenames.size(), "Files within partition " + partition + " should match");
            assertTrue(fsFileNames.equals(metadataFilenames), "Files within partition " + partition + " should match");
            // FileSystemView should expose the same data
            List<HoodieFileGroup> fileGroups = tableView.getAllFileGroups(partition).collect(Collectors.toList());
            fileGroups.addAll(tableView.getAllReplacedFileGroups(partition).collect(Collectors.toList()));
            fileGroups.forEach(g -> LogManager.getLogger(TestHoodieBackedMetadata.class).info(g));
            fileGroups.forEach(g -> g.getAllBaseFiles().forEach(b -> LogManager.getLogger(TestHoodieBackedMetadata.class).info(b)));
            fileGroups.forEach(g -> g.getAllFileSlices().forEach(s -> LogManager.getLogger(TestHoodieBackedMetadata.class).info(s)));
            long numFiles = fileGroups.stream().mapToLong(g -> g.getAllBaseFiles().count() + g.getAllFileSlices().mapToLong(s -> s.getLogFiles().count()).sum()).sum();
            assertEquals(metadataFilenames.size(), numFiles);
        } catch (IOException e) {
            e.printStackTrace();
            assertTrue(false, "Exception should not be raised: " + e);
        }
    });
    HoodieBackedTableMetadataWriter metadataWriter = metadataWriter(client);
    assertNotNull(metadataWriter, "MetadataWriter should have been initialized");
    // Validate write config for metadata table
    HoodieWriteConfig metadataWriteConfig = metadataWriter.getWriteConfig();
    assertFalse(metadataWriteConfig.isMetadataTableEnabled(), "No metadata table for metadata table");
    // Metadata table should be in sync with the dataset
    HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
    // Metadata table is MOR
    assertEquals(metadataMetaClient.getTableType(), HoodieTableType.MERGE_ON_READ, "Metadata Table should be MOR");
    // Metadata table is HFile format
    assertEquals(metadataMetaClient.getTableConfig().getBaseFileFormat(), HoodieFileFormat.HFILE, "Metadata Table base file format should be HFile");
    // Metadata table has a fixed number of partitions
    // Cannot use FSUtils.getAllFoldersWithPartitionMetaFile for this as that function filters all directory
    // in the .hoodie folder.
    List<String> metadataTablePartitions = FSUtils.getAllPartitionPaths(engineContext, HoodieTableMetadata.getMetadataTableBasePath(basePath), false, false);
    assertEquals(metadataWriter.getEnabledPartitionTypes().size(), metadataTablePartitions.size());
    final Map<String, MetadataPartitionType> metadataEnabledPartitionTypes = new HashMap<>();
    metadataWriter.getEnabledPartitionTypes().forEach(e -> metadataEnabledPartitionTypes.put(e.getPartitionPath(), e));
    // Metadata table should automatically compact and clean
    // versions are +1 as autoclean / compaction happens end of commits
    int numFileVersions = metadataWriteConfig.getCleanerFileVersionsRetained() + 1;
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metadataMetaClient, metadataMetaClient.getActiveTimeline());
    metadataTablePartitions.forEach(partition -> {
        List<FileSlice> latestSlices = fsView.getLatestFileSlices(partition).collect(Collectors.toList());
        assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).count() <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest base file per file group");
        assertTrue(latestSlices.size() <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest file slice per file group");
        assertTrue(latestSlices.size() <= (numFileVersions * metadataEnabledPartitionTypes.get(partition).getFileGroupCount()), "Should limit file slice to " + numFileVersions + " per file group, but was " + latestSlices.size());
    });
    LOG.info("Validation time=" + timer.endTimer());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) FileStatus(org.apache.hadoop.fs.FileStatus) Disabled(org.junit.jupiter.api.Disabled) Collections.singletonList(java.util.Collections.singletonList) Future(java.util.concurrent.Future) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) WriteConcurrencyMode(org.apache.hudi.common.model.WriteConcurrencyMode) Tag(org.junit.jupiter.api.Tag) FileSystemViewStorageType(org.apache.hudi.common.table.view.FileSystemViewStorageType) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Pair(org.apache.hadoop.hbase.util.Pair) Schema(org.apache.avro.Schema) Set(java.util.Set) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) Executors(java.util.concurrent.Executors) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieMetadataMergedLogRecordReader(org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieMetadataMetrics(org.apache.hudi.metadata.HoodieMetadataMetrics) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieBackedTableMetadataWriter(org.apache.hudi.metadata.HoodieBackedTableMetadataWriter) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) MetadataPartitionType(org.apache.hudi.metadata.MetadataPartitionType) DELETE(org.apache.hudi.common.model.WriteOperationType.DELETE) Registry(org.apache.hudi.common.metrics.Registry) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) ValueSource(org.junit.jupiter.params.provider.ValueSource) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) INSERT(org.apache.hudi.common.model.WriteOperationType.INSERT) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieMetadataRecord(org.apache.hudi.avro.model.HoodieMetadataRecord) HoodieHFileReader(org.apache.hudi.io.storage.HoodieHFileReader) Paths(java.nio.file.Paths) HoodieKey(org.apache.hudi.common.model.HoodieKey) UPSERT(org.apache.hudi.common.model.WriteOperationType.UPSERT) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) COPY_ON_WRITE(org.apache.hudi.common.model.HoodieTableType.COPY_ON_WRITE) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) MethodSource(org.junit.jupiter.params.provider.MethodSource) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) LockConfiguration(org.apache.hudi.common.config.LockConfiguration) Collections.emptyList(java.util.Collections.emptyList) SparkUpgradeDowngradeHelper(org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper) HoodieMetadataPayload(org.apache.hudi.metadata.HoodieMetadataPayload) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) InProcessLockProvider(org.apache.hudi.client.transaction.lock.InProcessLockProvider) FileSlice(org.apache.hudi.common.model.FileSlice) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) UpgradeDowngrade(org.apache.hudi.table.upgrade.UpgradeDowngrade) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) LinkedList(java.util.LinkedList) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) ExecutorService(java.util.concurrent.ExecutorService) GenericRecord(org.apache.avro.generic.GenericRecord) FILESYSTEM_LOCK_PATH_PROP_KEY(org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieLockConfig(org.apache.hudi.config.HoodieLockConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) Time(org.apache.hadoop.util.Time) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) MetadataPartitionType(org.apache.hudi.metadata.MetadataPartitionType) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieBackedTableMetadataWriter(org.apache.hudi.metadata.HoodieBackedTableMetadataWriter) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) HoodieTable(org.apache.hudi.table.HoodieTable)

Example 75 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestHoodieBackedMetadata method testMetadataRecordKeyExcludeFromPayload.

/**
 * 1. Verify metadata table records key deduplication feature. When record key
 * deduplication is enabled, verify the metadata record payload on disk has empty key.
 * Otherwise, verify the valid key.
 * 2. Verify populate meta fields work irrespective of record key deduplication config.
 * 3. Verify table services like compaction benefit from record key deduplication feature.
 */
@ParameterizedTest
@MethodSource("testMetadataRecordKeyExcludeFromPayloadArgs")
public void testMetadataRecordKeyExcludeFromPayload(final HoodieTableType tableType, final boolean enableMetaFields) throws Exception {
    initPath();
    writeConfig = getWriteConfigBuilder(true, true, false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).withPopulateMetaFields(enableMetaFields).withMaxNumDeltaCommitsBeforeCompaction(3).build()).build();
    init(tableType, writeConfig);
    // 2nd commit
    doWriteOperation(testTable, "0000001", INSERT);
    final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
    HoodieWriteConfig metadataTableWriteConfig = getMetadataWriteConfig(writeConfig);
    metadataMetaClient.reloadActiveTimeline();
    final HoodieTable table = HoodieSparkTable.create(metadataTableWriteConfig, context, metadataMetaClient);
    // Compaction has not yet kicked in. Verify all the log files
    // for the metadata records persisted on disk as per the config.
    assertDoesNotThrow(() -> {
        verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000001", enableMetaFields);
    }, "Metadata table should have valid log files!");
    // Verify no base file created yet.
    assertThrows(IllegalStateException.class, () -> {
        verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
    }, "Metadata table should not have a base file yet!");
    // 2 more commits
    doWriteOperation(testTable, "0000002", UPSERT);
    doWriteOperation(testTable, "0000004", UPSERT);
    // Compaction should be triggered by now. Let's verify the log files
    // if any for the metadata records persisted on disk as per the config.
    assertDoesNotThrow(() -> {
        verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000002", enableMetaFields);
    }, "Metadata table should have valid log files!");
    // Verify the base file created by the just completed compaction.
    assertDoesNotThrow(() -> {
        verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
    }, "Metadata table should have a valid base file!");
    // 2 more commits to trigger one more compaction, along with a clean
    doWriteOperation(testTable, "0000005", UPSERT);
    doClean(testTable, "0000006", Arrays.asList("0000004"));
    doWriteOperation(testTable, "0000007", UPSERT);
    assertDoesNotThrow(() -> {
        verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "7", enableMetaFields);
    }, "Metadata table should have valid log files!");
    assertDoesNotThrow(() -> {
        verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
    }, "Metadata table should have a valid base file!");
    validateMetadata(testTable);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

HoodieTable (org.apache.hudi.table.HoodieTable)133 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)105 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)76 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)75 List (java.util.List)64 Test (org.junit.jupiter.api.Test)63 ArrayList (java.util.ArrayList)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)57 WriteStatus (org.apache.hudi.client.WriteStatus)49 Path (org.apache.hadoop.fs.Path)48 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)46 Option (org.apache.hudi.common.util.Option)46 IOException (java.io.IOException)44 Map (java.util.Map)44 Collectors (java.util.stream.Collectors)44 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)43 HashMap (java.util.HashMap)41 Pair (org.apache.hudi.common.util.collection.Pair)39 HoodieKey (org.apache.hudi.common.model.HoodieKey)38 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)38