Search in sources :

Example 96 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieTable method validateSchema.

/**
 * Ensure that the current writerSchema is compatible with the latest schema of this dataset.
 *
 * When inserting/updating data, we read records using the last used schema and convert them to the
 * GenericRecords with writerSchema. Hence, we need to ensure that this conversion can take place without errors.
 */
private void validateSchema() throws HoodieUpsertException, HoodieInsertException {
    if (!config.getAvroSchemaValidate() || getActiveTimeline().getCommitsTimeline().filterCompletedInstants().empty()) {
        // Check not required
        return;
    }
    Schema tableSchema;
    Schema writerSchema;
    boolean isValid;
    try {
        TableSchemaResolver schemaResolver = new TableSchemaResolver(getMetaClient());
        writerSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema());
        tableSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaResolver.getTableAvroSchemaWithoutMetadataFields());
        isValid = TableSchemaResolver.isSchemaCompatible(tableSchema, writerSchema);
    } catch (Exception e) {
        throw new HoodieException("Failed to read schema/check compatibility for base path " + metaClient.getBasePath(), e);
    }
    if (!isValid) {
        throw new HoodieException("Failed schema compatibility check for writerSchema :" + writerSchema + ", table schema :" + tableSchema + ", base path :" + metaClient.getBasePath());
    }
}
Also used : Schema(org.apache.avro.Schema) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) TimeoutException(java.util.concurrent.TimeoutException) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 97 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TableSchemaResolver method getTableParquetSchemaFromDataFile.

/**
 * Gets the schema for a hoodie table. Depending on the type of table, read from any file written in the latest
 * commit. We will assume that the schema has not changed within a single atomic write.
 *
 * @return Parquet schema for this table
 */
private MessageType getTableParquetSchemaFromDataFile() {
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantAndCommitMetadata = activeTimeline.getLastCommitMetadataWithValidData();
    try {
        switch(metaClient.getTableType()) {
            case COPY_ON_WRITE:
                // For COW table, the file has data written must be in parquet or orc format currently.
                if (instantAndCommitMetadata.isPresent()) {
                    HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
                    String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
                    return readSchemaFromBaseFile(filePath);
                } else {
                    throw new IllegalArgumentException("Could not find any data file written for commit, " + "so could not get schema for table " + metaClient.getBasePath());
                }
            case MERGE_ON_READ:
                // Determine the file format based on the file name, and then extract schema from it.
                if (instantAndCommitMetadata.isPresent()) {
                    HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
                    String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
                    if (filePath.contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())) {
                        // this is a log file
                        return readSchemaFromLogFile(new Path(filePath));
                    } else {
                        return readSchemaFromBaseFile(filePath);
                    }
                } else {
                    throw new IllegalArgumentException("Could not find any data file written for commit, " + "so could not get schema for table " + metaClient.getBasePath());
                }
            default:
                LOG.error("Unknown table type " + metaClient.getTableType());
                throw new InvalidTableException(metaClient.getBasePath());
        }
    } catch (IOException e) {
        throw new HoodieException("Failed to read data schema", e);
    }
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Path(org.apache.hadoop.fs.Path) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) InvalidTableException(org.apache.hudi.exception.InvalidTableException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) Pair(org.apache.hudi.common.util.collection.Pair)

Example 98 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TableSchemaResolver method getTableSchemaFromCommitMetadata.

/**
 * Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the instant.
 *
 * @return Avro schema for this table
 */
private Option<Schema> getTableSchemaFromCommitMetadata(HoodieInstant instant, boolean includeMetadataFields) {
    try {
        HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
        byte[] data = timeline.getInstantDetails(instant).get();
        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(data, HoodieCommitMetadata.class);
        String existingSchemaStr = metadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);
        if (StringUtils.isNullOrEmpty(existingSchemaStr)) {
            return Option.empty();
        }
        Schema schema = new Schema.Parser().parse(existingSchemaStr);
        if (includeMetadataFields) {
            schema = HoodieAvroUtils.addMetadataFields(schema, hasOperationField);
        }
        return Option.of(schema);
    } catch (Exception e) {
        throw new HoodieException("Failed to read schema from commit metadata", e);
    }
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) InvalidTableException(org.apache.hudi.exception.InvalidTableException)

Example 99 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class FileStatusDTO method fromFileStatus.

public static FileStatusDTO fromFileStatus(FileStatus fileStatus) {
    if (null == fileStatus) {
        return null;
    }
    FileStatusDTO dto = new FileStatusDTO();
    try {
        dto.path = FilePathDTO.fromPath(fileStatus.getPath());
        dto.length = fileStatus.getLen();
        dto.isdir = fileStatus.isDirectory();
        dto.blockReplication = fileStatus.getReplication();
        dto.blocksize = fileStatus.getBlockSize();
        dto.modificationTime = fileStatus.getModificationTime();
        dto.accessTime = fileStatus.getAccessTime();
        dto.symlink = fileStatus.isSymlink() ? FilePathDTO.fromPath(fileStatus.getSymlink()) : null;
        safeReadAndSetMetadata(dto, fileStatus);
    } catch (IOException ioe) {
        throw new HoodieException(ioe);
    }
    return dto;
}
Also used : HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 100 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieTestSuiteJob method runTestSuite.

public void runTestSuite() {
    try {
        WorkflowDag workflowDag = createWorkflowDag();
        log.info("Workflow Dag => " + DagUtils.convertDagToYaml(workflowDag));
        long startTime = System.currentTimeMillis();
        WriterContext writerContext = new WriterContext(jsc, props, cfg, keyGenerator, sparkSession);
        writerContext.initContext(jsc);
        startOtherServicesIfNeeded(writerContext);
        if (this.cfg.saferSchemaEvolution) {
            // rollback most recent upsert/insert, by default.
            int numRollbacks = 2;
            // if root is RollbackNode, get num_rollbacks
            List<DagNode> root = workflowDag.getNodeList();
            if (!root.isEmpty() && root.get(0) instanceof RollbackNode) {
                numRollbacks = root.get(0).getConfig().getNumRollbacks();
            }
            int version = getSchemaVersionFromCommit(numRollbacks - 1);
            SaferSchemaDagScheduler dagScheduler = new SaferSchemaDagScheduler(workflowDag, writerContext, jsc, version);
            dagScheduler.schedule();
        } else {
            DagScheduler dagScheduler = new DagScheduler(workflowDag, writerContext, jsc);
            dagScheduler.schedule();
        }
        log.info("Finished scheduling all tasks, Time taken {}", System.currentTimeMillis() - startTime);
    } catch (Exception e) {
        log.error("Failed to run Test Suite ", e);
        throw new HoodieException("Failed to run Test Suite ", e);
    } finally {
        stopQuietly();
    }
}
Also used : WriterContext(org.apache.hudi.integ.testsuite.dag.WriterContext) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) SaferSchemaDagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.SaferSchemaDagScheduler) DagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler) RollbackNode(org.apache.hudi.integ.testsuite.dag.nodes.RollbackNode) SaferSchemaDagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.SaferSchemaDagScheduler) WorkflowDag(org.apache.hudi.integ.testsuite.dag.WorkflowDag) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13