Search in sources :

Example 71 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class SparkBootstrapCommitActionExecutor method commit.

@Override
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<HoodieData<WriteStatus>> result) {
    // Perform bootstrap index write and then commit. Make sure both record-key and bootstrap-index
    // is all done in a single job DAG.
    Map<String, List<Pair<BootstrapFileMapping, HoodieWriteStat>>> bootstrapSourceAndStats = result.getWriteStatuses().collectAsList().stream().map(w -> {
        BootstrapWriteStatus ws = (BootstrapWriteStatus) w;
        return Pair.of(ws.getBootstrapSourceFileMapping(), ws.getStat());
    }).collect(Collectors.groupingBy(w -> w.getKey().getPartitionPath()));
    HoodieTableMetaClient metaClient = table.getMetaClient();
    try (BootstrapIndex.IndexWriter indexWriter = BootstrapIndex.getBootstrapIndex(metaClient).createWriter(metaClient.getTableConfig().getBootstrapBasePath().get())) {
        LOG.info("Starting to write bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
        indexWriter.begin();
        bootstrapSourceAndStats.forEach((key, value) -> indexWriter.appendNextPartition(key, value.stream().map(Pair::getKey).collect(Collectors.toList())));
        indexWriter.finish();
        LOG.info("Finished writing bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
    }
    commit(extraMetadata, result, bootstrapSourceAndStats.values().stream().flatMap(f -> f.stream().map(Pair::getValue)).collect(Collectors.toList()));
    LOG.info("Committing metadata bootstrap !!");
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseCommitActionExecutor(org.apache.hudi.table.action.commit.BaseCommitActionExecutor) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) Logger(org.apache.log4j.Logger) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Duration(java.time.Duration) Map(java.util.Map) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapPartitionPathTranslator(org.apache.hudi.client.bootstrap.translator.BootstrapPartitionPathTranslator) Collection(java.util.Collection) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) WRITE_STATUS_STORAGE_LEVEL_VALUE(org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) SparkBulkInsertCommitActionExecutor(org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) MetadataBootstrapHandlerFactory.getMetadataHandler(org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler) HoodieBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieBootstrapSchemaProvider) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector) HoodieData(org.apache.hudi.common.data.HoodieData) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Iterator(java.util.Iterator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieSparkBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieSparkBootstrapSchemaProvider) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkValidatorUtils(org.apache.hudi.client.utils.SparkValidatorUtils) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) List(java.util.List) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping)

Example 72 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieRowCreateHandle method close.

/**
 * Closes the {@link HoodieRowCreateHandle} and returns an instance of {@link HoodieInternalWriteStatus} containing the stats and
 * status of the writes to this handle.
 * @return the {@link HoodieInternalWriteStatus} containing the stats and status of the writes to this handle.
 * @throws IOException
 */
public HoodieInternalWriteStatus close() throws IOException {
    fileWriter.close();
    HoodieWriteStat stat = writeStatus.getStat();
    stat.setPartitionPath(partitionPath);
    stat.setNumWrites(writeStatus.getTotalRecords());
    stat.setNumDeletes(0);
    stat.setNumInserts(writeStatus.getTotalRecords());
    stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
    stat.setFileId(fileId);
    stat.setPath(new Path(writeConfig.getBasePath()), path);
    long fileSizeInBytes = FSUtils.getFileSize(table.getMetaClient().getFs(), path);
    stat.setTotalWriteBytes(fileSizeInBytes);
    stat.setFileSizeInBytes(fileSizeInBytes);
    stat.setTotalWriteErrors(writeStatus.getFailedRowsSize());
    HoodieWriteStat.RuntimeStats runtimeStats = new HoodieWriteStat.RuntimeStats();
    runtimeStats.setTotalCreateTime(currTimer.endTimer());
    stat.setRuntimeStats(runtimeStats);
    return writeStatus;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat)

Example 73 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class SparkRDDWriteClient method completeCompaction.

@Override
protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable table, String compactionCommitTime) {
    this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
    List<HoodieWriteStat> writeStats = metadata.getWriteStats();
    final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
        finalizeWrite(table, compactionCommitTime, writeStats);
        // commit to data table after committing to metadata table.
        updateTableMetadata(table, metadata, compactionInstant);
        LOG.info("Committing Compaction " + compactionCommitTime + ". Finished with result " + metadata);
        CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
    } finally {
        this.txnManager.endTransaction(Option.of(compactionInstant));
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, compactionCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (compactionTimer != null) {
        long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(compactionCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.COMPACTION_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + compactionCommitTime, e);
        }
    }
    LOG.info("Compacted successfully on commit " + compactionCommitTime);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException)

Example 74 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieRealtimeRecordReader method createHoodieWriteStat.

private HoodieWriteStat createHoodieWriteStat(java.nio.file.Path basePath, String commitNumber, String partitionPath, String filePath, String fileId) {
    HoodieWriteStat writeStat = new HoodieWriteStat();
    writeStat.setFileId(fileId);
    writeStat.setNumDeletes(0);
    writeStat.setNumUpdateWrites(100);
    writeStat.setNumWrites(100);
    writeStat.setPath(filePath);
    writeStat.setFileSizeInBytes(new File(new Path(basePath.toString(), filePath).toString()).length());
    writeStat.setPartitionPath(partitionPath);
    writeStat.setTotalLogFilesCompacted(100L);
    HoodieWriteStat.RuntimeStats runtimeStats = new HoodieWriteStat.RuntimeStats();
    runtimeStats.setTotalScanTime(100);
    runtimeStats.setTotalCreateTime(100);
    runtimeStats.setTotalUpsertTime(100);
    writeStat.setRuntimeStats(runtimeStats);
    return writeStat;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14