Search in sources :

Example 1 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestMetadataConversionUtils method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // some cases requestedReplaceMetadata will be null
    // e.g. insert_overwrite_table or insert_overwrite without clustering
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
    HoodieCommitMetadata inflightReplaceMetadata = null;
    if (isClustering) {
        requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
        requestedReplaceMetadata.setOperationType(writeOperationType.name());
        HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
        HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
        HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
        clusteringGroup.setSlices(Arrays.asList(sliceInfo));
        clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
        requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
        requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    } else {
        // inflightReplaceMetadata will be null in clustering but not null
        // in insert_overwrite or insert_overwrite_table
        inflightReplaceMetadata = new HoodieCommitMetadata();
        inflightReplaceMetadata.setOperationType(writeOperationType);
        inflightReplaceMetadata.setCompacted(false);
    }
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 2 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieFlinkWriteClient method completeCompaction.

@Override
public void completeCompaction(HoodieCommitMetadata metadata, HoodieTable table, String compactionCommitTime) {
    this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
    List<HoodieWriteStat> writeStats = metadata.getWriteStats();
    final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
        finalizeWrite(table, compactionCommitTime, writeStats);
        // commit to data table after committing to metadata table.
        // Do not do any conflict resolution here as we do with regular writes. We take the lock here to ensure all writes to metadata table happens within a
        // single lock (single writer). Because more than one write to metadata table will result in conflicts since all of them updates the same partition.
        table.getMetadataWriter(compactionInstant.getTimestamp()).ifPresent(w -> ((HoodieTableMetadataWriter) w).update(metadata, compactionInstant.getTimestamp(), table.isTableServiceAction(compactionInstant.getAction())));
        LOG.info("Committing Compaction {} finished with result {}.", compactionCommitTime, metadata);
        CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
    } finally {
        this.txnManager.endTransaction(Option.of(compactionInstant));
    }
    if (compactionTimer != null) {
        long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(compactionCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.COMPACTION_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + compactionCommitTime, e);
        }
    }
    LOG.info("Compacted successfully on commit " + compactionCommitTime);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException)

Example 3 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createReplace.

private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
    String fileId1 = "file-1";
    String fileId2 = "file-2";
    // create replace instant to mark fileId1 as deleted
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    Map<String, List<String>> partitionFileIds = new HashMap<>();
    partitionFileIds.put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, Arrays.asList(fileId2));
    replaceMetadata.setPartitionToReplaceFileIds(partitionFileIds);
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId("file-1");
    replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
    replaceMetadata.setOperationType(writeOperationType);
    // create replace instant to mark fileId1 as deleted
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.name());
    HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
    HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
    HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
    sliceInfo.setFileId(fileId1);
    sliceInfo.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    clusteringGroup.setSlices(Arrays.asList(sliceInfo));
    clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
    requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
    requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieTestTable.of(metaClient).addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata).withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) HashMap(java.util.HashMap) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan)

Example 4 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieRowDataCreateHandle method close.

/**
 * Closes the {@link HoodieRowDataCreateHandle} and returns an instance of {@link HoodieInternalWriteStatus} containing the stats and
 * status of the writes to this handle.
 *
 * @return the {@link HoodieInternalWriteStatus} containing the stats and status of the writes to this handle.
 * @throws IOException
 */
public HoodieInternalWriteStatus close() throws IOException {
    fileWriter.close();
    HoodieWriteStat stat = writeStatus.getStat();
    stat.setPartitionPath(partitionPath);
    stat.setNumWrites(writeStatus.getTotalRecords());
    stat.setNumDeletes(0);
    stat.setNumInserts(writeStatus.getTotalRecords());
    stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
    stat.setFileId(fileId);
    stat.setPath(new Path(writeConfig.getBasePath()), path);
    long fileSizeInBytes = FSUtils.getFileSize(table.getMetaClient().getFs(), path);
    stat.setTotalWriteBytes(fileSizeInBytes);
    stat.setFileSizeInBytes(fileSizeInBytes);
    stat.setTotalWriteErrors(writeStatus.getFailedRowsSize());
    HoodieWriteStat.RuntimeStats runtimeStats = new HoodieWriteStat.RuntimeStats();
    runtimeStats.setTotalCreateTime(currTimer.endTimer());
    stat.setRuntimeStats(runtimeStats);
    return writeStatus;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat)

Example 5 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestCleaner method generateReplaceCommitMetadata.

private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(String instantTime, String partition, String replacedFileId, String newFileId) {
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
    requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.toString());
    requestedReplaceMetadata.setVersion(1);
    HoodieSliceInfo sliceInfo = HoodieSliceInfo.newBuilder().setFileId(replacedFileId).build();
    List<HoodieClusteringGroup> clusteringGroups = new ArrayList<>();
    clusteringGroups.add(HoodieClusteringGroup.newBuilder().setVersion(1).setNumOutputFileGroups(1).setMetrics(Collections.emptyMap()).setSlices(Collections.singletonList(sliceInfo)).build());
    requestedReplaceMetadata.setExtraMetadata(Collections.emptyMap());
    requestedReplaceMetadata.setClusteringPlan(HoodieClusteringPlan.newBuilder().setVersion(1).setExtraMetadata(Collections.emptyMap()).setStrategy(HoodieClusteringStrategy.newBuilder().setStrategyClassName("").setVersion(1).build()).setInputGroups(clusteringGroups).build());
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    replaceMetadata.addReplaceFileId(partition, replacedFileId);
    replaceMetadata.setOperationType(WriteOperationType.CLUSTER);
    if (!StringUtils.isNullOrEmpty(newFileId)) {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partition);
        writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
        writeStat.setFileId(newFileId);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        replaceMetadata.addWriteStat(partition, writeStat);
    }
    return Pair.of(requestedReplaceMetadata, replaceMetadata);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14