use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieClientTestUtils method read.
/**
* Reads the paths under the hoodie table out as a DataFrame.
*/
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs, String... paths) {
List<String> filteredPaths = new ArrayList<>();
try {
List<HoodieBaseFile> latestFiles = getLatestBaseFiles(basePath, fs, paths);
for (HoodieBaseFile file : latestFiles) {
filteredPaths.add(file.getPath());
}
if (filteredPaths.isEmpty()) {
return sqlContext.emptyDataFrame();
}
String[] filteredPathsToRead = filteredPaths.toArray(new String[filteredPaths.size()]);
if (filteredPathsToRead[0].endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
return sqlContext.read().parquet(filteredPathsToRead);
} else if (filteredPathsToRead[0].endsWith(HoodieFileFormat.ORC.getFileExtension())) {
return sqlContext.read().orc(filteredPathsToRead);
}
return sqlContext.emptyDataFrame();
} catch (Exception e) {
throw new HoodieException("Error reading hoodie table as a dataframe", e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieClientTestUtils method readCommit.
public static Dataset<Row> readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline, String instantTime, boolean filterByCommitTime) {
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!commitTimeline.containsInstant(commitInstant)) {
throw new HoodieException("No commit exists at " + instantTime);
}
try {
HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
LOG.info("Path :" + paths.values());
Dataset<Row> unFilteredRows = null;
if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.PARQUET)) {
unFilteredRows = sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]));
} else if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.ORC)) {
unFilteredRows = sqlContext.read().orc(paths.values().toArray(new String[paths.size()]));
}
if (unFilteredRows != null) {
if (filterByCommitTime) {
return unFilteredRows.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime));
} else {
return unFilteredRows;
}
} else {
return sqlContext.emptyDataFrame();
}
} catch (Exception e) {
throw new HoodieException("Error reading commit " + instantTime, e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class TestWriteMarkersBase method createInvalidFile.
private void createInvalidFile(String partitionPath, String invalidFileName) {
Path path = FSUtils.getPartitionPath(markerFolderPath.toString(), partitionPath);
Path invalidFilePath = new Path(path, invalidFileName);
try {
fs.create(invalidFilePath, false).close();
} catch (IOException e) {
throw new HoodieException("Failed to create invalid file " + invalidFilePath, e);
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class HoodieAvroUtils method getNestedFieldVal.
/**
* Obtain value of the provided field, denoted by dot notation. e.g: a.b.c
*/
public static Object getNestedFieldVal(GenericRecord record, String fieldName, boolean returnNullIfNotFound, boolean consistentLogicalTimestampEnabled) {
String[] parts = fieldName.split("\\.");
GenericRecord valueNode = record;
int i = 0;
try {
for (; i < parts.length; i++) {
String part = parts[i];
Object val = valueNode.get(part);
if (val == null) {
break;
}
// return, if last part of name
if (i == parts.length - 1) {
Schema fieldSchema = valueNode.getSchema().getField(part).schema();
return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled);
} else {
// VC: Need a test here
if (!(val instanceof GenericRecord)) {
throw new HoodieException("Cannot find a record at part value :" + part);
}
valueNode = (GenericRecord) val;
}
}
} catch (AvroRuntimeException e) {
// So when returnNullIfNotFound is true, catch this exception.
if (!returnNullIfNotFound) {
throw e;
}
}
if (returnNullIfNotFound) {
return null;
} else if (valueNode.getSchema().getField(parts[i]) == null) {
throw new HoodieException(fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :" + valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
} else {
throw new HoodieException("The value of " + parts[i] + " can not be null");
}
}
use of org.apache.hudi.exception.HoodieException in project hudi by apache.
the class KafkaConnectHdfsProvider method getCheckpoint.
@Override
public String getCheckpoint() throws HoodieException {
final KafkaConnectPathFilter filter = new KafkaConnectPathFilter();
ArrayList<FileStatus> fileStatus;
try {
fileStatus = listAllFileStatus(this.path, filter);
} catch (IOException e) {
throw new HoodieException(e.toString());
}
if (fileStatus.size() == 0) {
throw new HoodieException("No valid Kafka Connect Hdfs file found under:" + this.path.getName());
}
final String topic = fileStatus.get(0).getPath().getName().split(FILENAME_SEPARATOR)[0];
int maxPartition = -1;
final HashMap<Integer, Integer> checkpointMap = new HashMap<>();
for (final FileStatus status : fileStatus) {
final String filename = status.getPath().getName();
final String[] groups = filename.split(FILENAME_SEPARATOR);
final int partition = Integer.parseInt(groups[1]);
final int offsetUpper = Integer.parseInt(groups[3]);
maxPartition = Math.max(maxPartition, partition);
if (checkpointMap.containsKey(partition)) {
checkpointMap.put(partition, Math.max(checkpointMap.get(partition), offsetUpper));
} else {
checkpointMap.put(partition, offsetUpper);
}
}
if (checkpointMap.size() != maxPartition + 1) {
throw new HoodieException("Missing partition from the file scan, " + "max partition found(start from 0): " + maxPartition + " total partitions number appear in " + this.path.getName() + " is: " + checkpointMap.size() + " total partitions number expected: " + (maxPartition + 1));
}
return buildCheckpointStr(topic, checkpointMap);
}
Aggregations