use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class SqlFileBasedTransformer method apply.
@Override
public Dataset<Row> apply(final JavaSparkContext jsc, final SparkSession sparkSession, final Dataset<Row> rowDataset, final TypedProperties props) {
final String sqlFile = props.getString(Config.TRANSFORMER_SQL_FILE);
if (null == sqlFile) {
throw new IllegalArgumentException("Missing required configuration : (" + Config.TRANSFORMER_SQL_FILE + ")");
}
final FileSystem fs = FSUtils.getFs(sqlFile, jsc.hadoopConfiguration(), true);
// tmp table name doesn't like dashes
final String tmpTable = TMP_TABLE.concat(UUID.randomUUID().toString().replace("-", "_"));
LOG.info("Registering tmp table : " + tmpTable);
rowDataset.registerTempTable(tmpTable);
try (final Scanner scanner = new Scanner(fs.open(new Path(sqlFile)), "UTF-8")) {
Dataset<Row> rows = null;
// each sql statement is separated with semicolon hence set that as delimiter.
scanner.useDelimiter(";");
LOG.info("SQL Query for transformation : ");
while (scanner.hasNext()) {
String sqlStr = scanner.next();
sqlStr = sqlStr.replaceAll(SRC_PATTERN, tmpTable).trim();
if (!sqlStr.isEmpty()) {
LOG.info(sqlStr);
// overwrite the same dataset object until the last statement then return.
rows = sparkSession.sql(sqlStr);
}
}
return rows;
} catch (final IOException ioe) {
throw new HoodieIOException("Error reading transformer SQL file.", ioe);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class CleanActionExecutor method runClean.
private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
ValidationUtils.checkArgument(cleanInstant.getState().equals(HoodieInstant.State.REQUESTED) || cleanInstant.getState().equals(HoodieInstant.State.INFLIGHT));
try {
final HoodieInstant inflightInstant;
final HoodieTimer timer = new HoodieTimer();
timer.startTimer();
if (cleanInstant.isRequested()) {
inflightInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
} else {
inflightInstant = cleanInstant;
}
List<HoodieCleanStat> cleanStats = clean(context, cleanerPlan);
if (cleanStats.isEmpty()) {
return HoodieCleanMetadata.newBuilder().build();
}
table.getMetaClient().reloadActiveTimeline();
HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(inflightInstant.getTimestamp(), Option.of(timer.endTimer()), cleanStats);
if (!skipLocking) {
this.txnManager.beginTransaction(Option.empty(), Option.empty());
}
writeTableMetadata(metadata, inflightInstant.getTimestamp());
table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
return metadata;
} catch (IOException e) {
throw new HoodieIOException("Failed to clean up after commit", e);
} finally {
if (!skipLocking) {
this.txnManager.endTransaction(Option.empty());
}
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class CleanPlanActionExecutor method requestClean.
/**
* Generates List of files to be cleaned.
*
* @param context HoodieEngineContext
* @return Cleaner Plan
*/
HoodieCleanerPlan requestClean(HoodieEngineContext context) {
try {
CleanPlanner<T, I, K, O> planner = new CleanPlanner<>(context, table, config);
Option<HoodieInstant> earliestInstant = planner.getEarliestCommitToRetain();
context.setJobStatus(this.getClass().getSimpleName(), "Obtaining list of partitions to be cleaned");
List<String> partitionsToClean = planner.getPartitionPathsToClean(earliestInstant);
if (partitionsToClean.isEmpty()) {
LOG.info("Nothing to clean here. It is already clean");
return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
}
LOG.info("Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned");
Map<String, List<HoodieCleanFileInfo>> cleanOps = context.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean)), cleanerParallelism).stream().collect(Collectors.toMap(Pair::getKey, y -> CleanerUtils.convertToHoodieCleanFileInfoList(y.getValue())));
return new HoodieCleanerPlan(earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null), config.getCleanerPolicy().name(), CollectionUtils.createImmutableMap(), CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
} catch (IOException e) {
throw new HoodieIOException("Failed to schedule clean operation", e);
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class CleanPlanActionExecutor method requestClean.
/**
* Creates a Cleaner plan if there are files to be cleaned and stores them in instant file.
* Cleaner Plan contains absolute file paths.
*
* @param startCleanTime Cleaner Instant Time
* @return Cleaner Plan if generated
*/
protected Option<HoodieCleanerPlan> requestClean(String startCleanTime) {
final HoodieCleanerPlan cleanerPlan = requestClean(context);
if ((cleanerPlan.getFilePathsToBeDeletedPerPartition() != null) && !cleanerPlan.getFilePathsToBeDeletedPerPartition().isEmpty() && cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).sum() > 0) {
// Only create cleaner plan which does some work
final HoodieInstant cleanInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.CLEAN_ACTION, startCleanTime);
// Save to both aux and timeline folder
try {
table.getActiveTimeline().saveToCleanRequested(cleanInstant, TimelineMetadataUtils.serializeCleanerPlan(cleanerPlan));
LOG.info("Requesting Cleaning with instant time " + cleanInstant);
} catch (IOException e) {
LOG.error("Got exception when saving cleaner requested file", e);
throw new HoodieIOException(e.getMessage(), e);
}
return Option.of(cleanerPlan);
}
return Option.empty();
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class BaseRollbackActionExecutor method execute.
@Override
public HoodieRollbackMetadata execute() {
table.getMetaClient().reloadActiveTimeline();
Option<HoodieInstant> rollbackInstant = table.getRollbackTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
if (!rollbackInstant.isPresent()) {
throw new HoodieRollbackException("No pending rollback instants found to execute rollback");
}
try {
HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(table.getMetaClient(), rollbackInstant.get());
return runRollback(table, rollbackInstant.get(), rollbackPlan);
} catch (IOException e) {
throw new HoodieIOException("Failed to fetch rollback plan for commit " + instantTime, e);
}
}
Aggregations