Search in sources :

Example 36 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseRestoreActionExecutor method getInstantsToRollback.

private List<HoodieInstant> getInstantsToRollback(HoodieInstant restoreInstant) throws IOException {
    List<HoodieInstant> instantsToRollback = new ArrayList<>();
    HoodieRestorePlan restorePlan = RestoreUtils.getRestorePlan(table.getMetaClient(), restoreInstant);
    for (HoodieInstantInfo instantInfo : restorePlan.getInstantsToRollback()) {
        // If restore crashed mid-way, there are chances that some commits are already rolled back,
        // but some are not. so, we can ignore those commits which are fully rolledback in previous attempt if any.
        Option<HoodieInstant> rollbackInstantOpt = table.getActiveTimeline().getWriteTimeline().filter(instant -> instant.getTimestamp().equals(instantInfo.getCommitTime()) && instant.getAction().equals(instantInfo.getAction())).firstInstant();
        if (rollbackInstantOpt.isPresent()) {
            instantsToRollback.add(rollbackInstantOpt.get());
        } else {
            LOG.warn("Ignoring already rolledback instant " + instantInfo.toString());
        }
    }
    return instantsToRollback;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) ArrayList(java.util.ArrayList) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan)

Example 37 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseRestoreActionExecutor method execute.

@Override
public HoodieRestoreMetadata execute() {
    HoodieTimer restoreTimer = new HoodieTimer();
    restoreTimer.startTimer();
    Option<HoodieInstant> restoreInstant = table.getRestoreTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
    if (!restoreInstant.isPresent()) {
        throw new HoodieRollbackException("No pending restore instants found to execute restore");
    }
    try {
        List<HoodieInstant> instantsToRollback = getInstantsToRollback(restoreInstant.get());
        ValidationUtils.checkArgument(restoreInstant.get().getState().equals(HoodieInstant.State.REQUESTED) || restoreInstant.get().getState().equals(HoodieInstant.State.INFLIGHT));
        Map<String, List<HoodieRollbackMetadata>> instantToMetadata = new HashMap<>();
        if (restoreInstant.get().isRequested()) {
            table.getActiveTimeline().transitionRestoreRequestedToInflight(restoreInstant.get());
        }
        instantsToRollback.forEach(instant -> {
            instantToMetadata.put(instant.getTimestamp(), Collections.singletonList(rollbackInstant(instant)));
            LOG.info("Deleted instant " + instant);
        });
        return finishRestore(instantToMetadata, instantsToRollback, restoreTimer.endTimer());
    } catch (IOException io) {
        throw new HoodieRestoreException("unable to Restore instant " + restoreInstant.get(), io);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HashMap(java.util.HashMap) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) HoodieRestoreException(org.apache.hudi.exception.HoodieRestoreException)

Example 38 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class BaseRollbackActionExecutor method execute.

@Override
public HoodieRollbackMetadata execute() {
    table.getMetaClient().reloadActiveTimeline();
    Option<HoodieInstant> rollbackInstant = table.getRollbackTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
    if (!rollbackInstant.isPresent()) {
        throw new HoodieRollbackException("No pending rollback instants found to execute rollback");
    }
    try {
        HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(table.getMetaClient(), rollbackInstant.get());
        return runRollback(table, rollbackInstant.get(), rollbackPlan);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to fetch rollback plan for commit " + instantTime, e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) Logger(org.apache.log4j.Logger) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieHeartbeatClient(org.apache.hudi.client.heartbeat.HoodieHeartbeatClient) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Collections(java.util.Collections) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 39 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class PartitionAwareClusteringPlanStrategy method generateClusteringPlan.

@Override
public Option<HoodieClusteringPlan> generateClusteringPlan() {
    HoodieTableMetaClient metaClient = getHoodieTable().getMetaClient();
    LOG.info("Scheduling clustering for " + metaClient.getBasePath());
    HoodieWriteConfig config = getWriteConfig();
    List<String> partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath());
    // get matched partitions if set
    partitionPaths = getMatchedPartitions(config, partitionPaths);
    // filter the partition paths if needed to reduce list status
    partitionPaths = filterPartitionPaths(partitionPaths);
    if (partitionPaths.isEmpty()) {
        // In case no partitions could be picked, return no clustering plan
        return Option.empty();
    }
    List<HoodieClusteringGroup> clusteringGroups = getEngineContext().flatMap(partitionPaths, partitionPath -> {
        List<FileSlice> fileSlicesEligible = getFileSlicesEligibleForClustering(partitionPath).collect(Collectors.toList());
        return buildClusteringGroupsForPartition(partitionPath, fileSlicesEligible).limit(getWriteConfig().getClusteringMaxNumGroups());
    }, partitionPaths.size()).stream().limit(getWriteConfig().getClusteringMaxNumGroups()).collect(Collectors.toList());
    if (clusteringGroups.isEmpty()) {
        LOG.info("No data available to cluster");
        return Option.empty();
    }
    HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(getWriteConfig().getClusteringExecutionStrategyClass()).setStrategyParams(getStrategyParams()).build();
    return Option.of(HoodieClusteringPlan.newBuilder().setStrategy(strategy).setInputGroups(clusteringGroups).setExtraMetadata(getExtraMetadata()).setVersion(getPlanVersion()).setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadataForClustering()).build());
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Collectors(java.util.stream.Collectors) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) List(java.util.List) Stream(java.util.stream.Stream) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ClusteringPlanPartitionFilter(org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilter) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup)

Example 40 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class TestHoodieIndex method testSimpleTagLocationAndUpdate.

@ParameterizedTest
@MethodSource("indexTypeParams")
public void testSimpleTagLocationAndUpdate(IndexType indexType, boolean populateMetaFields, boolean enableMetadataIndex) throws Exception {
    setUp(indexType, populateMetaFields, enableMetadataIndex);
    String newCommitTime = "001";
    int totalRecords = 10 + random.nextInt(20);
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, totalRecords);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    // Test tagLocation without any entries in index
    JavaRDD<HoodieRecord> javaRDD = tagLocation(index, writeRecords, hoodieTable);
    assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
    // Insert totalRecords records
    writeClient.startCommitWithTime(newCommitTime);
    JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
    Assertions.assertNoWriteErrors(writeStatues.collect());
    // Now tagLocation for these records, index should not tag them since it was a failed
    // commit
    javaRDD = tagLocation(index, writeRecords, hoodieTable);
    assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
    // Now commit this & update location of records inserted and validate no errors
    writeClient.commit(newCommitTime, writeStatues);
    // Now tagLocation for these records, index should tag them correctly
    metaClient = HoodieTableMetaClient.reload(metaClient);
    hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    javaRDD = tagLocation(index, writeRecords, hoodieTable);
    Map<String, String> recordKeyToPartitionPathMap = new HashMap();
    List<HoodieRecord> hoodieRecords = writeRecords.collect();
    hoodieRecords.forEach(entry -> recordKeyToPartitionPathMap.put(entry.getRecordKey(), entry.getPartitionPath()));
    assertEquals(totalRecords, javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size());
    assertEquals(totalRecords, javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count());
    assertEquals(totalRecords, javaRDD.filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
    javaRDD.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry.getRecordKey()), entry.getPartitionPath(), "PartitionPath mismatch"));
    JavaRDD<HoodieKey> hoodieKeyJavaRDD = writeRecords.map(entry -> entry.getKey());
    JavaPairRDD<HoodieKey, Option<Pair<String, String>>> recordLocations = getRecordLocations(hoodieKeyJavaRDD, hoodieTable);
    List<HoodieKey> hoodieKeys = hoodieKeyJavaRDD.collect();
    assertEquals(totalRecords, recordLocations.collect().size());
    assertEquals(totalRecords, recordLocations.map(record -> record._1).distinct().count());
    recordLocations.foreach(entry -> assertTrue(hoodieKeys.contains(entry._1), "Missing HoodieKey"));
    recordLocations.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry._1.getRecordKey()), entry._1.getPartitionPath(), "PartitionPath mismatch"));
}
Also used : HoodieLayoutConfig(org.apache.hudi.config.HoodieLayoutConfig) HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Random(java.util.Random) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) SparkBucketIndexPartitioner(org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) Tag(org.junit.jupiter.api.Tag) FileSystemViewStorageType(org.apache.hudi.common.table.view.FileSystemViewStorageType) MethodSource(org.junit.jupiter.params.provider.MethodSource) Schema(org.apache.avro.Schema) IndexType(org.apache.hudi.index.HoodieIndex.IndexType) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) Tuple2(scala.Tuple2) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions(org.apache.hudi.testutils.Assertions) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) JavaRDD(org.apache.spark.api.java.JavaRDD) SchemaTestUtil.getSchemaFromResource(org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) IOException(java.io.IOException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieKey(org.apache.hudi.common.model.HoodieKey) Option(org.apache.hudi.common.util.Option) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

Option (org.apache.hudi.common.util.Option)105 List (java.util.List)84 IOException (java.io.IOException)70 Collectors (java.util.stream.Collectors)69 Map (java.util.Map)67 ArrayList (java.util.ArrayList)61 Path (org.apache.hadoop.fs.Path)59 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)59 Pair (org.apache.hudi.common.util.collection.Pair)59 HashMap (java.util.HashMap)58 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)56 LogManager (org.apache.log4j.LogManager)54 Logger (org.apache.log4j.Logger)54 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)53 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)46 HoodieIOException (org.apache.hudi.exception.HoodieIOException)44 Arrays (java.util.Arrays)43 FSUtils (org.apache.hudi.common.fs.FSUtils)43 Collections (java.util.Collections)39