Search in sources :

Example 91 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestCommitUtils method testReplaceMetadataCreation.

@Test
public void testReplaceMetadataCreation() {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    writeStats.add(createWriteStat("p1", "f1"));
    writeStats.add(createWriteStat("p2", "f2"));
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add("f0");
    partitionToReplaceFileIds.put("p1", replacedFileIds);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT, TRIP_SCHEMA, HoodieTimeline.REPLACE_COMMIT_ACTION);
    assertTrue(commitMetadata instanceof HoodieReplaceCommitMetadata);
    HoodieReplaceCommitMetadata replaceCommitMetadata = (HoodieReplaceCommitMetadata) commitMetadata;
    assertEquals(1, replaceCommitMetadata.getPartitionToReplaceFileIds().size());
    assertEquals("f0", replaceCommitMetadata.getPartitionToReplaceFileIds().get("p1").get(0));
    assertEquals(2, commitMetadata.getPartitionToWriteStats().size());
    assertEquals("f1", commitMetadata.getPartitionToWriteStats().get("p1").get(0).getFileId());
    assertEquals("f2", commitMetadata.getPartitionToWriteStats().get("p2").get(0).getFileId());
    assertEquals(WriteOperationType.INSERT, commitMetadata.getOperationType());
    assertEquals(TRIP_SCHEMA, commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY));
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) Test(org.junit.jupiter.api.Test)

Example 92 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class CompactHelpers method createCompactionMetadata.

public HoodieCommitMetadata createCompactionMetadata(HoodieTable table, String compactionInstantTime, HoodieData<WriteStatus> writeStatuses, String schema) throws IOException {
    byte[] planBytes = table.getActiveTimeline().readCompactionPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get();
    HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(planBytes);
    List<HoodieWriteStat> updateStatusMap = writeStatuses.map(WriteStatus::getStat).collectAsList();
    HoodieCommitMetadata metadata = new HoodieCommitMetadata(true);
    for (HoodieWriteStat stat : updateStatusMap) {
        metadata.addWriteStat(stat.getPartitionPath(), stat);
    }
    metadata.addMetadata(org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY, schema);
    if (compactionPlan.getExtraMetadata() != null) {
        compactionPlan.getExtraMetadata().forEach(metadata::addMetadata);
    }
    return metadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan)

Example 93 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class BaseCommitActionExecutor method executeClustering.

protected HoodieWriteMetadata<HoodieData<WriteStatus>> executeClustering(HoodieClusteringPlan clusteringPlan) {
    HoodieInstant instant = HoodieTimeline.getReplaceCommitRequestedInstant(instantTime);
    // Mark instant as clustering inflight
    table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
    table.getMetaClient().reloadActiveTimeline();
    final Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = ((ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>>) ReflectionUtils.loadClass(config.getClusteringExecutionStrategyClass(), new Class<?>[] { HoodieTable.class, HoodieEngineContext.class, HoodieWriteConfig.class }, table, context, config)).performClustering(clusteringPlan, schema, instantTime);
    HoodieData<WriteStatus> writeStatusList = writeMetadata.getWriteStatuses();
    HoodieData<WriteStatus> statuses = updateIndex(writeStatusList, writeMetadata);
    writeMetadata.setWriteStats(statuses.map(WriteStatus::getStat).collectAsList());
    writeMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(clusteringPlan, writeMetadata));
    validateWriteResult(clusteringPlan, writeMetadata);
    commitOnAutoCommit(writeMetadata);
    if (!writeMetadata.getCommitMetadata().isPresent()) {
        HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(), extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
        writeMetadata.setCommitMetadata(Option.of(commitMetadata));
    }
    return writeMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTable(org.apache.hudi.table.HoodieTable) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 94 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TableSchemaResolver method getTableSchemaFromCommitMetadata.

/**
 * Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the last commit with valid schema.
 *
 * @return Avro schema for this table
 */
private Option<Schema> getTableSchemaFromCommitMetadata(boolean includeMetadataFields) {
    Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantAndCommitMetadata = metaClient.getActiveTimeline().getLastCommitMetadataWithValidSchema();
    if (instantAndCommitMetadata.isPresent()) {
        HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
        String schemaStr = commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);
        Schema schema = new Schema.Parser().parse(schemaStr);
        if (includeMetadataFields) {
            schema = HoodieAvroUtils.addMetadataFields(schema, hasOperationField);
        }
        return Option.of(schema);
    } else {
        return Option.empty();
    }
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Schema(org.apache.avro.Schema) Pair(org.apache.hudi.common.util.collection.Pair)

Example 95 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TableSchemaResolver method readSchemaFromLastCompaction.

/**
 * Read schema from a data file from the last compaction commit done.
 * @throws Exception
 */
public MessageType readSchemaFromLastCompaction(Option<HoodieInstant> lastCompactionCommitOpt) throws Exception {
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieInstant lastCompactionCommit = lastCompactionCommitOpt.orElseThrow(() -> new Exception("Could not read schema from last compaction, no compaction commits found on path " + metaClient));
    // Read from the compacted file wrote
    HoodieCommitMetadata compactionMetadata = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(lastCompactionCommit).get(), HoodieCommitMetadata.class);
    String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().orElseThrow(() -> new IllegalArgumentException("Could not find any data file written for compaction " + lastCompactionCommit + ", could not get schema for table " + metaClient.getBasePath()));
    return readSchemaFromBaseFile(filePath);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) InvalidTableException(org.apache.hudi.exception.InvalidTableException)

Aggregations

HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)139 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)64 ArrayList (java.util.ArrayList)54 HashMap (java.util.HashMap)49 List (java.util.List)48 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)44 IOException (java.io.IOException)42 Test (org.junit.jupiter.api.Test)41 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)40 Map (java.util.Map)38 Path (org.apache.hadoop.fs.Path)36 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)34 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)34 File (java.io.File)26 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)26 Option (org.apache.hudi.common.util.Option)25 Schema (org.apache.avro.Schema)22 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)21 Collectors (java.util.stream.Collectors)20 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)20