Search in sources :

Example 1 with HoodieRequestedReplaceMetadata

use of org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata in project hudi by apache.

the class TestMetadataConversionUtils method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // some cases requestedReplaceMetadata will be null
    // e.g. insert_overwrite_table or insert_overwrite without clustering
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
    HoodieCommitMetadata inflightReplaceMetadata = null;
    if (isClustering) {
        requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
        requestedReplaceMetadata.setOperationType(writeOperationType.name());
        HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
        HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
        HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
        clusteringGroup.setSlices(Arrays.asList(sliceInfo));
        clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
        requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
        requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    } else {
        // inflightReplaceMetadata will be null in clustering but not null
        // in insert_overwrite or insert_overwrite_table
        inflightReplaceMetadata = new HoodieCommitMetadata();
        inflightReplaceMetadata.setOperationType(writeOperationType);
        inflightReplaceMetadata.setCompacted(false);
    }
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 2 with HoodieRequestedReplaceMetadata

use of org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 3 with HoodieRequestedReplaceMetadata

use of org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata in project hudi by apache.

the class TestCleaner method generateReplaceCommitMetadata.

private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(String instantTime, String partition, String replacedFileId, String newFileId) {
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.toString());
    requestedReplaceMetadata.setVersion(1);
    HoodieSliceInfo sliceInfo = HoodieSliceInfo.newBuilder().setFileId(replacedFileId).build();
    List<HoodieClusteringGroup> clusteringGroups = new ArrayList<>();
    clusteringGroups.add(HoodieClusteringGroup.newBuilder().setVersion(1).setNumOutputFileGroups(1).setMetrics(Collections.emptyMap()).setSlices(Collections.singletonList(sliceInfo)).build());
    requestedReplaceMetadata.setExtraMetadata(Collections.emptyMap());
    requestedReplaceMetadata.setClusteringPlan(HoodieClusteringPlan.newBuilder().setVersion(1).setExtraMetadata(Collections.emptyMap()).setStrategy(HoodieClusteringStrategy.newBuilder().setStrategyClassName("").setVersion(1).build()).setInputGroups(clusteringGroups).build());
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    replaceMetadata.addReplaceFileId(partition, replacedFileId);
    replaceMetadata.setOperationType(WriteOperationType.CLUSTER);
    if (!StringUtils.isNullOrEmpty(newFileId)) {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partition);
        writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
        writeStat.setFileId(newFileId);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        replaceMetadata.addWriteStat(partition, writeStat);
    }
    return Pair.of(requestedReplaceMetadata, replaceMetadata);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 4 with HoodieRequestedReplaceMetadata

use of org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata in project hudi by apache.

the class TestUpsertPartitioner method testUpsertPartitionerWithSmallFileHandlingAndClusteringPlan.

@Test
public void testUpsertPartitionerWithSmallFileHandlingAndClusteringPlan() throws Exception {
    final String testPartitionPath = DEFAULT_PARTITION_PATHS[0];
    // create HoodieWriteConfig and set inline and async clustering disable here.
    HoodieWriteConfig config = makeHoodieClientConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().build()).withClusteringConfig(HoodieClusteringConfig.newBuilder().withInlineClustering(false).withAsyncClustering(false).build()).withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1000 * 1024).parquetMaxFileSize(1000 * 1024).build()).build();
    // create file slice with instantTime 001 and build clustering plan including this created 001 file slice.
    HoodieClusteringPlan clusteringPlan = ClusteringTestUtils.createClusteringPlan(metaClient, "001", "1");
    // create requested replace commit
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
    FileCreateUtils.createRequestedReplaceCommit(basePath, "002", Option.of(requestedReplaceMetadata));
    // create file slice 003
    FileCreateUtils.createBaseFile(basePath, testPartitionPath, "003", "3", 1);
    FileCreateUtils.createCommit(basePath, "003");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    // generate new data to be ingested
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] { testPartitionPath });
    List<HoodieRecord> insertRecords = dataGenerator.generateInserts("004", 100);
    WorkloadProfile profile = new WorkloadProfile(buildProfile(jsc.parallelize(insertRecords)));
    HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
    // create UpsertPartitioner
    UpsertPartitioner partitioner = new UpsertPartitioner(profile, context, table, config);
    // for now we have file slice1 and file slice3 and file slice1 is contained in pending clustering plan
    // So that only file slice3 can be used for ingestion.
    assertEquals(1, partitioner.smallFiles.size(), "Should have 1 small file to be ingested.");
}
Also used : WorkloadProfile(org.apache.hudi.table.WorkloadProfile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) Test(org.junit.jupiter.api.Test)

Example 5 with HoodieRequestedReplaceMetadata

use of org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata in project hudi by apache.

the class HoodieTestReplaceCommitMetadataGenerator method createReplaceCommitFileWithMetadata.

public static void createReplaceCommitFileWithMetadata(String basePath, String commitTime, Option<Integer> writes, Option<Integer> updates, HoodieTableMetaClient metaclient) throws Exception {
    HoodieReplaceCommitMetadata replaceMetadata = generateReplaceCommitMetadata(basePath, commitTime, UUID.randomUUID().toString(), UUID.randomUUID().toString(), writes, updates);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = getHoodieRequestedReplaceMetadata();
    HoodieTestTable.of(metaclient).addReplaceCommit(commitTime, Option.ofNullable(requestedReplaceMetadata), Option.empty(), replaceMetadata);
}
Also used : HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)17 HoodieClusteringPlan (org.apache.hudi.avro.model.HoodieClusteringPlan)9 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)9 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)7 IOException (java.io.IOException)6 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)5 Test (org.junit.jupiter.api.Test)5 HoodieClusteringGroup (org.apache.hudi.avro.model.HoodieClusteringGroup)4 HoodieSliceInfo (org.apache.hudi.avro.model.HoodieSliceInfo)4 HoodieIOException (org.apache.hudi.exception.HoodieIOException)4 Collections (java.util.Collections)3 Set (java.util.Set)3 Collectors (java.util.stream.Collectors)3 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)3 WriteOperationType (org.apache.hudi.common.model.WriteOperationType)3 Option (org.apache.hudi.common.util.Option)3