use of org.apache.hadoop.mapred.InvalidInputException in project hive by apache.
the class BucketizedHiveInputFormat method listStatus.
/**
* Recursively lists status for all files starting from the directory dir
* @param job
* @param dir
* @return
* @throws IOException
*/
protected FileStatus[] listStatus(JobConf job, Path dir) throws IOException {
ArrayList<FileStatus> result = new ArrayList<FileStatus>();
List<IOException> errors = new ArrayList<IOException>();
FileSystem fs = dir.getFileSystem(job);
FileStatus[] matches = fs.globStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + dir));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + dir + " matches 0 files"));
} else {
for (FileStatus globStat : matches) {
FileUtils.listStatusRecursively(fs, globStat, result);
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
LOG.debug("Matches for " + dir + ": " + result);
LOG.info("Total input paths to process : " + result.size() + " from dir " + dir);
return result.toArray(new FileStatus[result.size()]);
}
Aggregations