Search in sources :

Example 6 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieClientTestUtils method read.

/**
 * Reads the paths under the hoodie table out as a DataFrame.
 */
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs, String... paths) {
    List<String> filteredPaths = new ArrayList<>();
    try {
        List<HoodieBaseFile> latestFiles = getLatestBaseFiles(basePath, fs, paths);
        for (HoodieBaseFile file : latestFiles) {
            filteredPaths.add(file.getPath());
        }
        if (filteredPaths.isEmpty()) {
            return sqlContext.emptyDataFrame();
        }
        String[] filteredPathsToRead = filteredPaths.toArray(new String[filteredPaths.size()]);
        if (filteredPathsToRead[0].endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
            return sqlContext.read().parquet(filteredPathsToRead);
        } else if (filteredPathsToRead[0].endsWith(HoodieFileFormat.ORC.getFileExtension())) {
            return sqlContext.read().orc(filteredPathsToRead);
        }
        return sqlContext.emptyDataFrame();
    } catch (Exception e) {
        throw new HoodieException("Error reading hoodie table as a dataframe", e);
    }
}
Also used : HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 7 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieClientTestUtils method readCommit.

public static Dataset<Row> readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline, String instantTime, boolean filterByCommitTime) {
    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
    if (!commitTimeline.containsInstant(commitInstant)) {
        throw new HoodieException("No commit exists at " + instantTime);
    }
    try {
        HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
        LOG.info("Path :" + paths.values());
        Dataset<Row> unFilteredRows = null;
        if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.PARQUET)) {
            unFilteredRows = sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]));
        } else if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.ORC)) {
            unFilteredRows = sqlContext.read().orc(paths.values().toArray(new String[paths.size()]));
        }
        if (unFilteredRows != null) {
            if (filterByCommitTime) {
                return unFilteredRows.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime));
            } else {
                return unFilteredRows;
            }
        } else {
            return sqlContext.emptyDataFrame();
        }
    } catch (Exception e) {
        throw new HoodieException("Error reading commit " + instantTime, e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) Row(org.apache.spark.sql.Row) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 8 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TestWriteMarkersBase method createInvalidFile.

private void createInvalidFile(String partitionPath, String invalidFileName) {
    Path path = FSUtils.getPartitionPath(markerFolderPath.toString(), partitionPath);
    Path invalidFilePath = new Path(path, invalidFileName);
    try {
        fs.create(invalidFilePath, false).close();
    } catch (IOException e) {
        throw new HoodieException("Failed to create invalid file " + invalidFilePath, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 9 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieAvroUtils method getNestedFieldVal.

/**
 * Obtain value of the provided field, denoted by dot notation. e.g: a.b.c
 */
public static Object getNestedFieldVal(GenericRecord record, String fieldName, boolean returnNullIfNotFound, boolean consistentLogicalTimestampEnabled) {
    String[] parts = fieldName.split("\\.");
    GenericRecord valueNode = record;
    int i = 0;
    try {
        for (; i < parts.length; i++) {
            String part = parts[i];
            Object val = valueNode.get(part);
            if (val == null) {
                break;
            }
            // return, if last part of name
            if (i == parts.length - 1) {
                Schema fieldSchema = valueNode.getSchema().getField(part).schema();
                return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled);
            } else {
                // VC: Need a test here
                if (!(val instanceof GenericRecord)) {
                    throw new HoodieException("Cannot find a record at part value :" + part);
                }
                valueNode = (GenericRecord) val;
            }
        }
    } catch (AvroRuntimeException e) {
        // So when returnNullIfNotFound is true, catch this exception.
        if (!returnNullIfNotFound) {
            throw e;
        }
    }
    if (returnNullIfNotFound) {
        return null;
    } else if (valueNode.getSchema().getField(parts[i]) == null) {
        throw new HoodieException(fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :" + valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
    } else {
        throw new HoodieException("The value of " + parts[i] + " can not be null");
    }
}
Also used : Field(org.apache.avro.Schema.Field) SerializableSchema(org.apache.hudi.common.config.SerializableSchema) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) AvroRuntimeException(org.apache.avro.AvroRuntimeException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 10 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class KafkaConnectHdfsProvider method getCheckpoint.

@Override
public String getCheckpoint() throws HoodieException {
    final KafkaConnectPathFilter filter = new KafkaConnectPathFilter();
    ArrayList<FileStatus> fileStatus;
    try {
        fileStatus = listAllFileStatus(this.path, filter);
    } catch (IOException e) {
        throw new HoodieException(e.toString());
    }
    if (fileStatus.size() == 0) {
        throw new HoodieException("No valid Kafka Connect Hdfs file found under:" + this.path.getName());
    }
    final String topic = fileStatus.get(0).getPath().getName().split(FILENAME_SEPARATOR)[0];
    int maxPartition = -1;
    final HashMap<Integer, Integer> checkpointMap = new HashMap<>();
    for (final FileStatus status : fileStatus) {
        final String filename = status.getPath().getName();
        final String[] groups = filename.split(FILENAME_SEPARATOR);
        final int partition = Integer.parseInt(groups[1]);
        final int offsetUpper = Integer.parseInt(groups[3]);
        maxPartition = Math.max(maxPartition, partition);
        if (checkpointMap.containsKey(partition)) {
            checkpointMap.put(partition, Math.max(checkpointMap.get(partition), offsetUpper));
        } else {
            checkpointMap.put(partition, offsetUpper);
        }
    }
    if (checkpointMap.size() != maxPartition + 1) {
        throw new HoodieException("Missing partition from the file scan, " + "max partition found(start from 0): " + maxPartition + " total partitions number appear in " + this.path.getName() + " is: " + checkpointMap.size() + " total partitions number expected: " + (maxPartition + 1));
    }
    return buildCheckpointStr(topic, checkpointMap);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13