Search in sources :

Example 1 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollbackWithTestTable.

@Test
public void testCopyOnWriteRollbackWithTestTable() throws Exception {
    // given: wrote some base files and corresponding markers
    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
    String f0 = testTable.addRequestedCommit("000").getFileIdsWithBaseFilesInPartitions("partA").get("partA");
    String f1 = testTable.addCommit("001").withBaseFilesInPartition("partA", f0).getFileIdsWithBaseFilesInPartitions("partB").get("partB");
    String f2 = "f2";
    testTable.forCommit("001").withMarkerFile("partA", f0, IOType.MERGE).withMarkerFile("partB", f1, IOType.CREATE).withMarkerFile("partA", f2, IOType.CREATE);
    // when
    HoodieTable hoodieTable = HoodieSparkTable.create(getConfig(), context, metaClient);
    List<HoodieRollbackRequest> rollbackRequests = new MarkerBasedRollbackStrategy(hoodieTable, context, getConfig(), "002").getRollbackRequests(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"));
    List<HoodieRollbackStat> stats = new BaseRollbackHelper(hoodieTable.getMetaClient(), getConfig()).performRollback(context, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"), rollbackRequests);
    // then: ensure files are deleted correctly, non-existent files reported as failed deletes
    assertEquals(2, stats.size());
    FileStatus[] partAFiles = testTable.listAllFilesInPartition("partA");
    FileStatus[] partBFiles = testTable.listAllFilesInPartition("partB");
    assertEquals(0, partBFiles.length);
    assertEquals(1, partAFiles.length);
    assertEquals(2, stats.stream().mapToInt(r -> r.getSuccessDeleteFiles().size()).sum());
    assertEquals(1, stats.stream().mapToInt(r -> r.getFailedDeleteFiles().size()).sum());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseRollbackHelper(org.apache.hudi.table.action.rollback.BaseRollbackHelper) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest) MarkerBasedRollbackStrategy(org.apache.hudi.table.action.rollback.MarkerBasedRollbackStrategy) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollback.

@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testCopyOnWriteRollback(boolean useFileListingMetadata) throws Exception {
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(true).withAutoCommit(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(useFileListingMetadata).build()).withPath(basePath).build();
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, writeConfig)) {
        // rollback 2nd commit and ensure stats reflect the info.
        List<HoodieRollbackStat> stats = testRun(useFileListingMetadata, writeConfig, writeClient);
        assertEquals(3, stats.size());
        for (HoodieRollbackStat stat : stats) {
            assertEquals(1, stat.getSuccessDeleteFiles().size());
            assertEquals(0, stat.getFailedDeleteFiles().size());
            assertEquals(0, stat.getCommandBlocksCount().size());
        }
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 3 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestCopyOnWriteRollbackActionExecutor method testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile.

@Test
public void testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile() throws Exception {
    final String p1 = "2015/03/16";
    final String p2 = "2015/03/17";
    final String p3 = "2016/03/15";
    // Let's create some commit files and base files
    HoodieTestTable testTable = HoodieTestTable.of(metaClient).withPartitionMetaFiles(p1, p2, p3).addCommit("001").withBaseFilesInPartition(p1, "id11").withBaseFilesInPartition(p2, "id12").withLogFile(p1, "id11", 3).addCommit("002").withBaseFilesInPartition(p1, "id21").withBaseFilesInPartition(p2, "id22");
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(false).build();
    HoodieTable table = this.getHoodieTable(metaClient, writeConfig);
    HoodieInstant needRollBackInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "002");
    // execute CopyOnWriteRollbackActionExecutor with filelisting mode
    BaseRollbackPlanActionExecutor copyOnWriteRollbackPlanActionExecutor = new BaseRollbackPlanActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, false, table.getConfig().shouldRollbackUsingMarkers());
    HoodieRollbackPlan rollbackPlan = (HoodieRollbackPlan) copyOnWriteRollbackPlanActionExecutor.execute().get();
    CopyOnWriteRollbackActionExecutor copyOnWriteRollbackActionExecutor = new CopyOnWriteRollbackActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, true, false);
    List<HoodieRollbackStat> hoodieRollbackStats = copyOnWriteRollbackActionExecutor.executeRollback(rollbackPlan);
    // assert hoodieRollbackStats
    assertEquals(hoodieRollbackStats.size(), 3);
    for (HoodieRollbackStat stat : hoodieRollbackStats) {
        switch(stat.getPartitionPath()) {
            case p1:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p1, "id21").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p2:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p2, "id22").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p3:
                assertEquals(0, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                break;
            default:
                fail("Unexpected partition: " + stat.getPartitionPath());
        }
    }
    assertTrue(testTable.inflightCommitExists("001"));
    assertTrue(testTable.commitExists("001"));
    assertTrue(testTable.baseFileExists(p1, "001", "id11"));
    assertTrue(testTable.baseFileExists(p2, "001", "id12"));
    assertFalse(testTable.inflightCommitExists("002"));
    assertFalse(testTable.commitExists("002"));
    assertFalse(testTable.baseFileExists(p1, "002", "id21"));
    assertFalse(testTable.baseFileExists(p2, "002", "id22"));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestTimelineUtils method getRollbackMetadataInstance.

private HoodieRollbackMetadata getRollbackMetadataInstance(String basePath, String partition, String commitTs, int count, String actionType) {
    List<String> deletedFiles = new ArrayList<>();
    for (int i = 1; i <= count; i++) {
        deletedFiles.add("file-" + i);
    }
    List<HoodieInstant> rollbacks = new ArrayList<>();
    rollbacks.add(new HoodieInstant(false, actionType, commitTs));
    HoodieRollbackStat rollbackStat = new HoodieRollbackStat(partition, deletedFiles, Collections.emptyList(), Collections.emptyMap());
    List<HoodieRollbackStat> rollbackStats = new ArrayList<>();
    rollbackStats.add(rollbackStat);
    return TimelineMetadataUtils.convertRollbackMetadata(commitTs, Option.empty(), rollbacks, rollbackStats);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) ArrayList(java.util.ArrayList)

Example 5 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class BaseRollbackHelper method maybeDeleteAndCollectStats.

/**
 * May be delete interested files and collect stats or collect stats only.
 *
 * @param context           instance of {@link HoodieEngineContext} to use.
 * @param instantToRollback {@link HoodieInstant} of interest for which deletion or collect stats is requested.
 * @param rollbackRequests  List of {@link ListingBasedRollbackRequest} to be operated on.
 * @param doDelete          {@code true} if deletion has to be done. {@code false} if only stats are to be collected w/o performing any deletes.
 * @return stats collected with or w/o actual deletions.
 */
List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineContext context, HoodieInstant instantToRollback, List<SerializableHoodieRollbackRequest> rollbackRequests, boolean doDelete, int numPartitions) {
    return context.flatMap(rollbackRequests, (SerializableFunction<SerializableHoodieRollbackRequest, Stream<Pair<String, HoodieRollbackStat>>>) rollbackRequest -> {
        List<String> filesToBeDeleted = rollbackRequest.getFilesToBeDeleted();
        if (!filesToBeDeleted.isEmpty()) {
            List<HoodieRollbackStat> rollbackStats = deleteFiles(metaClient, filesToBeDeleted, doDelete);
            List<Pair<String, HoodieRollbackStat>> partitionToRollbackStats = new ArrayList<>();
            rollbackStats.forEach(entry -> partitionToRollbackStats.add(Pair.of(entry.getPartitionPath(), entry)));
            return partitionToRollbackStats.stream();
        } else if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
            HoodieLogFormat.Writer writer = null;
            try {
                String fileId = rollbackRequest.getFileId();
                String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
                writer = HoodieLogFormat.newWriterBuilder().onParentPath(FSUtils.getPartitionPath(metaClient.getBasePath(), rollbackRequest.getPartitionPath())).withFileId(fileId).overBaseCommit(latestBaseInstant).withFs(metaClient.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
                if (doDelete) {
                    Map<HoodieLogBlock.HeaderMetadataType, String> header = generateHeader(instantToRollback.getTimestamp());
                    writer.appendBlock(new HoodieCommandBlock(header));
                }
            } catch (IOException | InterruptedException io) {
                throw new HoodieRollbackException("Failed to rollback for instant " + instantToRollback, io);
            } finally {
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (IOException io) {
                    throw new HoodieIOException("Error appending rollback block", io);
                }
            }
            Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()), 1L);
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).withRollbackBlockAppendResults(filesToNumBlocksRollback).build())).stream();
        } else {
            return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(), HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()).build())).stream();
        }
    }, numPartitions);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) PathFilter(org.apache.hadoop.fs.PathFilter) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) SerializableFunction(org.apache.hudi.common.function.SerializableFunction) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) Stream(java.util.stream.Stream) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

HoodieRollbackStat (org.apache.hudi.common.HoodieRollbackStat)14 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)9 ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4 Test (org.junit.jupiter.api.Test)4 Collections (java.util.Collections)3 List (java.util.List)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)3 HoodieTimer (org.apache.hudi.common.util.HoodieTimer)3 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)3 HoodieIOException (org.apache.hudi.exception.HoodieIOException)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)2 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)2 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)2 HoodieRestoreMetadata (org.apache.hudi.avro.model.HoodieRestoreMetadata)2