use of org.apache.hadoop.mapreduce.lib.input.InvalidInputException in project angel by Tencent.
the class BalanceInputFormat method listStatus0.
protected List<FileStatus> listStatus0(Configuration conf) throws IOException {
List<FileStatus> result = new ArrayList<FileStatus>();
Path[] dirs = new Path[1];
dirs[0] = new Path(StringUtils.unEscapeString(conf.get(INPUT_DIR, "")));
if (dirs.length == 0) {
throw new IOException("No input paths specified in job");
}
// get tokens for all the required FileSystems..
// TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
// job.getConfiguration());
// Whether we need to recursive look into the directory structure
boolean recursive = conf.getBoolean(INPUT_DIR_RECURSIVE, false);
List<IOException> errors = new ArrayList<IOException>();
// creates a MultiPathFilter with the hiddenFileFilter and the
// user provided one (if any).
List<PathFilter> filters = new ArrayList<PathFilter>();
PathFilter inputFilter = new MultiPathFilter(filters);
for (int i = 0; i < dirs.length; ++i) {
Path p = dirs[i];
FileSystem fs = p.getFileSystem(conf);
FileStatus[] matches = fs.globStatus(p, inputFilter);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + p));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
} else {
for (FileStatus globStat : matches) {
if (globStat.isDirectory()) {
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
while (iter.hasNext()) {
LocatedFileStatus stat = iter.next();
if (inputFilter.accept(stat.getPath())) {
if (recursive && stat.isDirectory()) {
addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
} else {
result.add(stat);
}
}
}
} else {
result.add(globStat);
}
}
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
return result;
}
use of org.apache.hadoop.mapreduce.lib.input.InvalidInputException in project hazelcast by hazelcast.
the class ReadHadoopNewApiP method getSplits.
private static <K, V> List<InputSplit> getSplits(Configuration configuration) throws Exception {
InputFormat<K, V> inputFormat = extractInputFormat(configuration);
Job job = Job.getInstance(configuration);
try {
return inputFormat.getSplits(job);
} catch (InvalidInputException e) {
String directory = configuration.get(INPUT_DIR, "");
boolean ignoreFileNotFound = configuration.getBoolean(HadoopSources.IGNORE_FILE_NOT_FOUND, true);
if (ignoreFileNotFound) {
ILogger logger = Logger.getLogger(ReadHadoopNewApiP.class);
logger.fine("The directory '" + directory + "' does not exist. This source will emit 0 items.");
return emptyList();
} else {
throw new JetException("The input " + directory + " matches no files");
}
}
}
use of org.apache.hadoop.mapreduce.lib.input.InvalidInputException in project shifu by ShifuML.
the class ShifuInputFormat method listCrossValidationStatus.
@SuppressWarnings("deprecation")
protected List<FileStatus> listCrossValidationStatus(JobContext job) throws IOException {
List<FileStatus> result = new ArrayList<FileStatus>();
Path[] dirs = getInputPaths(job);
if (dirs.length == 0) {
throw new IOException("No input paths specified in job");
}
// get tokens for all the required FileSystems..
TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());
// Whether we need to recursive look into the directory structure
boolean recursive = job.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive", false);
List<IOException> errors = new ArrayList<IOException>();
// creates a MultiPathFilter with the hiddenFileFilter and the
// user provided one (if any).
List<PathFilter> filters = new ArrayList<PathFilter>();
filters.add(hiddenFileFilter);
PathFilter jobFilter = getInputPathFilter(job);
if (jobFilter != null) {
filters.add(jobFilter);
}
PathFilter inputFilter = new MultiPathFilter(filters);
for (int i = 0; i < dirs.length; ++i) {
Path p = dirs[i];
FileSystem fs = p.getFileSystem(job.getConfiguration());
FileStatus[] matches = fs.globStatus(p, inputFilter);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + p));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
} else {
for (FileStatus globStat : matches) {
if (globStat.isDir()) {
FileStatus[] fss = fs.listStatus(globStat.getPath());
for (FileStatus fileStatus : fss) {
if (inputFilter.accept(fileStatus.getPath())) {
if (recursive && fileStatus.isDir()) {
addInputPathRecursive(result, fs, fileStatus.getPath(), inputFilter);
} else {
result.add(fileStatus);
}
}
}
} else {
result.add(globStat);
}
}
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
LOG.info("Total validation paths to process : " + result.size());
return result;
}
Aggregations