use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.
the class AbstractDFSCarbonFile method listFiles.
/**
* Method used to list files recursively and apply file filter on the result.
*/
@Override
public List<CarbonFile> listFiles(boolean recursive, CarbonFileFilter fileFilter) throws IOException {
List<CarbonFile> carbonFiles = new ArrayList<>();
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (null != fileStatus && fileStatus.isDirectory()) {
RemoteIterator<LocatedFileStatus> listStatus = fileSystem.listFiles(fileStatus.getPath(), recursive);
while (listStatus.hasNext()) {
LocatedFileStatus locatedFileStatus = listStatus.next();
CarbonFile carbonFile = FileFactory.getCarbonFile(locatedFileStatus.getPath().toString());
if (fileFilter.accept(carbonFile)) {
carbonFiles.add(carbonFile);
}
}
}
return carbonFiles;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project carbondata by apache.
the class AbstractDFSCarbonFile method listFiles.
@Override
public CarbonFile[] listFiles(boolean recursive, int maxCount) throws IOException {
List<CarbonFile> carbonFiles = new ArrayList<>();
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (null != fileStatus && fileStatus.isDirectory()) {
RemoteIterator<LocatedFileStatus> listStatus = fileSystem.listFiles(path, recursive);
int counter = 0;
while (counter < maxCount && listStatus.hasNext()) {
LocatedFileStatus locatedFileStatus = listStatus.next();
CarbonFile carbonFile = FileFactory.getCarbonFile(locatedFileStatus.getPath().toString());
carbonFiles.add(carbonFile);
counter++;
}
}
return carbonFiles.toArray(new CarbonFile[0]);
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hive by apache.
the class HdfsUtils method listLocatedStatus.
public static List<FileStatus> listLocatedStatus(final FileSystem fs, final Path path, final PathFilter filter) throws IOException {
RemoteIterator<LocatedFileStatus> itr = fs.listLocatedStatus(path);
List<FileStatus> result = new ArrayList<FileStatus>();
while (itr.hasNext()) {
FileStatus stat = itr.next();
if (filter == null || filter.accept(stat.getPath())) {
result.add(stat);
}
}
return result;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.
the class HdfsDataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
try {
final FileSystem fs = path.getFileSystem(config);
if (fs.isDirectory(path)) {
try {
return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
@Override
public FileUtils.FileCopyResult call() throws Exception {
if (!fs.exists(path)) {
throw new SegmentLoadingException("No files found at [%s]", path.toString());
}
final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
while (children.hasNext()) {
final LocatedFileStatus child = children.next();
final Path childPath = child.getPath();
final String fname = childPath.getName();
if (fs.isDirectory(childPath)) {
log.warn("[%s] is a child directory, skipping", childPath.toString());
} else {
final File outFile = new File(outDir, fname);
// Actual copy
fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
result.addFile(outFile);
}
}
log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
}
}, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
} catch (Exception e) {
throw Throwables.propagate(e);
}
} else if (CompressionUtils.isZip(path.getName())) {
// -------- zip ---------
final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outDir, shouldRetryPredicate(), false);
log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
} else if (CompressionUtils.isGz(path.getName())) {
// -------- gzip ---------
final String fname = path.getName();
final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outFile);
log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
return result;
} else {
throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
}
}
use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.
the class HdfsTaskLogs method killOlderThan.
@Override
public void killOlderThan(long timestamp) throws IOException {
Path taskLogDir = new Path(config.getDirectory());
FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
if (fs.exists(taskLogDir)) {
if (!fs.isDirectory(taskLogDir)) {
throw new IOException(String.format("taskLogDir [%s] must be a directory.", taskLogDir));
}
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
while (iter.hasNext()) {
LocatedFileStatus file = iter.next();
if (file.getModificationTime() < timestamp) {
Path p = file.getPath();
log.info("Deleting hdfs task log [%s].", p.toUri().toString());
fs.delete(p, true);
}
if (Thread.currentThread().isInterrupted()) {
throw new IOException(new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
}
}
}
}
Aggregations