use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieTable method validateSchema.
/**
* Ensure that the current writerSchema is compatible with the latest schema of this dataset.
*
* When inserting/updating data, we read records using the last used schema and convert them to the
* GenericRecords with writerSchema. Hence, we need to ensure that this conversion can take place without errors.
*/
private void validateSchema() throws HoodieUpsertException, HoodieInsertException {
if (!config.getAvroSchemaValidate() || getActiveTimeline().getCommitsTimeline().filterCompletedInstants().empty()) {
// Check not required
return;
}
Schema tableSchema;
Schema writerSchema;
boolean isValid;
try {
TableSchemaResolver schemaResolver = new TableSchemaResolver(getMetaClient());
writerSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema());
tableSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaResolver.getTableAvroSchemaWithoutMetadataFields());
isValid = TableSchemaResolver.isSchemaCompatible(tableSchema, writerSchema);
} catch (Exception e) {
throw new HoodieException("Failed to read schema/check compatibility for base path " + metaClient.getBasePath(), e);
}
if (!isValid) {
throw new HoodieException("Failed schema compatibility check for writerSchema :" + writerSchema + ", table schema :" + tableSchema + ", base path :" + metaClient.getBasePath());
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class TableSchemaResolver method getTableParquetSchemaFromDataFile.
/**
* Gets the schema for a hoodie table. Depending on the type of table, read from any file written in the latest
* commit. We will assume that the schema has not changed within a single atomic write.
*
* @return Parquet schema for this table
*/
private MessageType getTableParquetSchemaFromDataFile() {
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantAndCommitMetadata = activeTimeline.getLastCommitMetadataWithValidData();
try {
switch(metaClient.getTableType()) {
case COPY_ON_WRITE:
// For COW table, the file has data written must be in parquet or orc format currently.
if (instantAndCommitMetadata.isPresent()) {
HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
return readSchemaFromBaseFile(filePath);
} else {
throw new IllegalArgumentException("Could not find any data file written for commit, " + "so could not get schema for table " + metaClient.getBasePath());
}
case MERGE_ON_READ:
// Determine the file format based on the file name, and then extract schema from it.
if (instantAndCommitMetadata.isPresent()) {
HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
if (filePath.contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())) {
// this is a log file
return readSchemaFromLogFile(new Path(filePath));
} else {
return readSchemaFromBaseFile(filePath);
}
} else {
throw new IllegalArgumentException("Could not find any data file written for commit, " + "so could not get schema for table " + metaClient.getBasePath());
}
default:
LOG.error("Unknown table type " + metaClient.getTableType());
throw new InvalidTableException(metaClient.getBasePath());
}
} catch (IOException e) {
throw new HoodieException("Failed to read data schema", e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class TableSchemaResolver method getTableSchemaFromCommitMetadata.
/**
* Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the instant.
*
* @return Avro schema for this table
*/
private Option<Schema> getTableSchemaFromCommitMetadata(HoodieInstant instant, boolean includeMetadataFields) {
try {
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
byte[] data = timeline.getInstantDetails(instant).get();
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
String existingSchemaStr = metadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);
if (StringUtils.isNullOrEmpty(existingSchemaStr)) {
return Option.empty();
}
Schema schema = new Schema.Parser().parse(existingSchemaStr);
if (includeMetadataFields) {
schema = HoodieAvroUtils.addMetadataFields(schema, hasOperationField);
}
return Option.of(schema);
} catch (Exception e) {
throw new HoodieException("Failed to read schema from commit metadata", e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class FileStatusDTO method fromFileStatus.
public static FileStatusDTO fromFileStatus(FileStatus fileStatus) {
if (null == fileStatus) {
return null;
}
FileStatusDTO dto = new FileStatusDTO();
try {
dto.path = FilePathDTO.fromPath(fileStatus.getPath());
dto.length = fileStatus.getLen();
dto.isdir = fileStatus.isDirectory();
dto.blockReplication = fileStatus.getReplication();
dto.blocksize = fileStatus.getBlockSize();
dto.modificationTime = fileStatus.getModificationTime();
dto.accessTime = fileStatus.getAccessTime();
dto.symlink = fileStatus.isSymlink() ? FilePathDTO.fromPath(fileStatus.getSymlink()) : null;
safeReadAndSetMetadata(dto, fileStatus);
} catch (IOException ioe) {
throw new HoodieException(ioe);
}
return dto;
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieTestSuiteJob method runTestSuite.
public void runTestSuite() {
try {
WorkflowDag workflowDag = createWorkflowDag();
log.info("Workflow Dag => " + DagUtils.convertDagToYaml(workflowDag));
long startTime = System.currentTimeMillis();
WriterContext writerContext = new WriterContext(jsc, props, cfg, keyGenerator, sparkSession);
writerContext.initContext(jsc);
startOtherServicesIfNeeded(writerContext);
if (this.cfg.saferSchemaEvolution) {
// rollback most recent upsert/insert, by default.
int numRollbacks = 2;
// if root is RollbackNode, get num_rollbacks
List<DagNode> root = workflowDag.getNodeList();
if (!root.isEmpty() && root.get(0) instanceof RollbackNode) {
numRollbacks = root.get(0).getConfig().getNumRollbacks();
}
int version = getSchemaVersionFromCommit(numRollbacks - 1);
SaferSchemaDagScheduler dagScheduler = new SaferSchemaDagScheduler(workflowDag, writerContext, jsc, version);
dagScheduler.schedule();
} else {
DagScheduler dagScheduler = new DagScheduler(workflowDag, writerContext, jsc);
dagScheduler.schedule();
}
log.info("Finished scheduling all tasks, Time taken {}", System.currentTimeMillis() - startTime);
} catch (Exception e) {
log.error("Failed to run Test Suite ", e);
throw new HoodieException("Failed to run Test Suite ", e);
} finally {
stopQuietly();
}
}
Aggregations