Search in sources :

Example 6 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class MetadataCommand method create.

@CliCommand(value = "metadata create", help = "Create the Metadata Table if it does not exist")
public String create(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
    HoodieCLI.getTableMetaClient();
    Path metadataPath = new Path(getMetadataTableBasePath(HoodieCLI.basePath));
    try {
        FileStatus[] statuses = HoodieCLI.fs.listStatus(metadataPath);
        if (statuses.length > 0) {
            throw new RuntimeException("Metadata directory (" + metadataPath.toString() + ") not empty.");
        }
    } catch (FileNotFoundException e) {
        // Metadata directory does not exist yet
        HoodieCLI.fs.mkdirs(metadataPath);
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    HoodieWriteConfig writeConfig = getWriteConfig();
    initJavaSparkContext(Option.of(master));
    SparkHoodieBackedTableMetadataWriter.create(HoodieCLI.conf, writeConfig, new HoodieSparkEngineContext(jsc));
    return String.format("Created Metadata Table in %s (duration=%.2f secs)", metadataPath, timer.endTimer() / 1000.0);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) FileStatus(org.apache.hadoop.fs.FileStatus) FileNotFoundException(java.io.FileNotFoundException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 7 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class MetadataCommand method listPartitions.

@CliCommand(value = "metadata list-partitions", help = "List all partitions from metadata")
public String listPartitions(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master) throws IOException {
    HoodieCLI.getTableMetaClient();
    initJavaSparkContext(Option.of(master));
    HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
    HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new HoodieSparkEngineContext(jsc), config, HoodieCLI.basePath, "/tmp");
    if (!metadata.enabled()) {
        return "[ERROR] Metadata Table not enabled/initialized\n\n";
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<String> partitions = metadata.getAllPartitionPaths();
    LOG.debug("Took " + timer.endTimer() + " ms");
    final List<Comparable[]> rows = new ArrayList<>();
    partitions.stream().sorted(Comparator.reverseOrder()).forEach(p -> {
        Comparable[] row = new Comparable[1];
        row[0] = p;
        rows.add(row);
    });
    TableHeader header = new TableHeader().addTableHeaderField("partition");
    return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) TableHeader(org.apache.hudi.cli.TableHeader) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieBackedTableMetadata(org.apache.hudi.metadata.HoodieBackedTableMetadata) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 8 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class BaseTableMetadata method fetchAllPartitionPaths.

/**
 * Returns a list of all partitions.
 */
protected List<String> fetchAllPartitionPaths() {
    HoodieTimer timer = new HoodieTimer().startTimer();
    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_PARTITIONS_STR, timer.endTimer()));
    List<String> partitions = Collections.emptyList();
    if (hoodieRecord.isPresent()) {
        mayBeHandleSpuriousDeletes(hoodieRecord, "\"all partitions\"");
        partitions = hoodieRecord.get().getData().getFilenames();
        // Partition-less tables have a single empty partition
        if (partitions.contains(NON_PARTITIONED_NAME)) {
            partitions.remove(NON_PARTITIONED_NAME);
            partitions.add("");
        }
    }
    LOG.info("Listed partitions from metadata: #partitions=" + partitions.size());
    return partitions;
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Example 9 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class BaseTableMetadata method fetchAllFilesInPartition.

/**
 * Return all the files from the partition.
 *
 * @param partitionPath The absolute path of the partition
 */
FileStatus[] fetchAllFilesInPartition(Path partitionPath) throws IOException {
    String partitionName = FSUtils.getRelativePartitionPath(new Path(dataBasePath), partitionPath);
    if (partitionName.isEmpty()) {
        partitionName = NON_PARTITIONED_NAME;
    }
    HoodieTimer timer = new HoodieTimer().startTimer();
    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(partitionName, MetadataPartitionType.FILES.getPartitionPath());
    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_FILES_STR, timer.endTimer()));
    FileStatus[] statuses = {};
    if (hoodieRecord.isPresent()) {
        mayBeHandleSpuriousDeletes(hoodieRecord, partitionName);
        statuses = hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
    }
    LOG.info("Listed file in partition from metadata: partition=" + partitionName + ", #files=" + statuses.length);
    return statuses;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Example 10 with HoodieTimer

use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.

the class CleanActionExecutor method runClean.

private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
    ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
    try {
        final HoodieInstant inflightInstant;
        final HoodieTimer timer = new HoodieTimer();
        timer.startTimer();
        if (cleanInstant.isRequested()) {
            inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
        } else {
            inflightInstant = cleanInstant;
        }
        List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
        if (cleanStats.isEmpty()) {
            return HoodieCleanMetadata.newBuilder().build();
        }
        table.getMetaClient().reloadActiveTimeline();
        HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
        if (!skipLocking) {
            this.txnManager.beginTransaction(Option.empty(), Option.empty());
        }
        writeTableMetadata(metadata, inflightInstant.getTimestamp());
        table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
        LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
        return metadata;
    } catch (IOException e) {
        throw new HoodieIOException("Failed to clean up after commit", e);
    } finally {
        if (!skipLocking) {
            this.txnManager.endTransaction(Option.empty());
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieTimer (org.apache.hudi.common.util.HoodieTimer)35 ArrayList (java.util.ArrayList)16 Path (org.apache.hadoop.fs.Path)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)12 Option (org.apache.hudi.common.util.Option)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)11 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)10 Map (java.util.Map)9 Pair (org.apache.hudi.common.util.collection.Pair)9 List (java.util.List)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)7 LogManager (org.apache.log4j.LogManager)7 Logger (org.apache.log4j.Logger)7 Collectors (java.util.stream.Collectors)6 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)6 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)6