Search in sources :

Example 51 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class SqlFileBasedTransformer method apply.

@Override
public Dataset<Row> apply(final JavaSparkContext jsc, final SparkSession sparkSession, final Dataset<Row> rowDataset, final TypedProperties props) {
    final String sqlFile = props.getString(Config.TRANSFORMER_SQL_FILE);
    if (null == sqlFile) {
        throw new IllegalArgumentException("Missing required configuration : (" + Config.TRANSFORMER_SQL_FILE + ")");
    }
    final FileSystem fs = FSUtils.getFs(sqlFile, jsc.hadoopConfiguration(), true);
    // tmp table name doesn't like dashes
    final String tmpTable = TMP_TABLE.concat(UUID.randomUUID().toString().replace("-", "_"));
    LOG.info("Registering tmp table : " + tmpTable);
    rowDataset.registerTempTable(tmpTable);
    try (final Scanner scanner = new Scanner(fs.open(new Path(sqlFile)), "UTF-8")) {
        Dataset<Row> rows = null;
        // each sql statement is separated with semicolon hence set that as delimiter.
        scanner.useDelimiter(";");
        LOG.info("SQL Query for transformation : ");
        while (scanner.hasNext()) {
            String sqlStr = scanner.next();
            sqlStr = sqlStr.replaceAll(SRC_PATTERN, tmpTable).trim();
            if (!sqlStr.isEmpty()) {
                LOG.info(sqlStr);
                // overwrite the same dataset object until the last statement then return.
                rows = sparkSession.sql(sqlStr);
            }
        }
        return rows;
    } catch (final IOException ioe) {
        throw new HoodieIOException("Error reading transformer SQL file.", ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Scanner(java.util.Scanner) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileSystem(org.apache.hadoop.fs.FileSystem) Row(org.apache.spark.sql.Row) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 52 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CleanActionExecutor method runClean.

private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
    ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
    try {
        final HoodieInstant inflightInstant;
        final HoodieTimer timer = new HoodieTimer();
        timer.startTimer();
        if (cleanInstant.isRequested()) {
            inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
        } else {
            inflightInstant = cleanInstant;
        }
        List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
        if (cleanStats.isEmpty()) {
            return HoodieCleanMetadata.newBuilder().build();
        }
        table.getMetaClient().reloadActiveTimeline();
        HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
        if (!skipLocking) {
            this.txnManager.beginTransaction(Option.empty(), Option.empty());
        }
        writeTableMetadata(metadata, inflightInstant.getTimestamp());
        table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
        LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
        return metadata;
    } catch (IOException e) {
        throw new HoodieIOException("Failed to clean up after commit", e);
    } finally {
        if (!skipLocking) {
            this.txnManager.endTransaction(Option.empty());
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 53 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CleanPlanActionExecutor method requestClean.

/**
 * Generates List of files to be cleaned.
 *
 * @param context HoodieEngineContext
 * @return Cleaner Plan
 */
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
    try {
        CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
        Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
        context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
        List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
        if (partitionsToClean.isEmpty()) {
            LOG.info("Nothing to clean here. It is already clean");
            return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
        }
        LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
        LOG.info("Using cleanerParallelism: " + cleanerParallelism);
        context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
        Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
        return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to schedule clean operation", e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieCleanFileInfo(org.apache.hudi.avro.model.HoodieCleanFileInfo) Collectors(java.util.stream.Collectors) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) List(java.util.List) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) List(java.util.List) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 54 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class CleanPlanActionExecutor method requestClean.

/**
 * Creates a Cleaner plan if there are files to be cleaned and stores them in instant file.
 * Cleaner Plan contains absolute file paths.
 *
 * @param startCleanTime Cleaner Instant Time
 * @return Cleaner Plan if generated
 */
protected Option<HoodieCleanerPlan> requestClean(String startCleanTime) {
    final HoodieCleanerPlan cleanerPlan = requestClean(context);
    if ((cleanerPlan.getFilePathsToBeDeletedPerPartition() != null) && !cleanerPlan.getFilePathsToBeDeletedPerPartition().isEmpty() && cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).sum() > 0) {
        // Only create cleaner plan which does some work
        final HoodieInstant cleanInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.CLEAN_ACTION, startCleanTime);
        // Save to both aux and timeline folder
        try {
            table.getActiveTimeline().saveToCleanRequested(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
            LOG.info("Requesting Cleaning with instant time " + cleanInstant);
        } catch (IOException e) {
            LOG.error("Got exception when saving cleaner requested file", e);
            throw new HoodieIOException(e.getMessage(), e);
        }
        return Option.of(cleanerPlan);
    }
    return Option.empty();
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan)

Example 55 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BaseRollbackActionExecutor method execute.

@Override
public HoodieRollbackMetadata execute() {
    table.getMetaClient().reloadActiveTimeline();
    Option<HoodieInstant> rollbackInstant = table.getRollbackTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
    if (!rollbackInstant.isPresent()) {
        throw new HoodieRollbackException("No pending rollback instants found to execute rollback");
    }
    try {
        HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(table.getMetaClient(), rollbackInstant.get());
        return runRollback(table, rollbackInstant.get(), rollbackPlan);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to fetch rollback plan for commit " + instantTime, e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) TransactionManager(org.apache.hudi.client.transaction.TransactionManager) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) Logger(org.apache.log4j.Logger) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) HoodieHeartbeatClient(org.apache.hudi.client.heartbeat.HoodieHeartbeatClient) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Collections(java.util.Collections) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17