use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.
the class HdfsDataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
try {
final FileSystem fs = path.getFileSystem(config);
if (fs.isDirectory(path)) {
try {
return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
@Override
public FileUtils.FileCopyResult call() throws Exception {
if (!fs.exists(path)) {
throw new SegmentLoadingException("No files found at [%s]", path.toString());
}
final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
while (children.hasNext()) {
final LocatedFileStatus child = children.next();
final Path childPath = child.getPath();
final String fname = childPath.getName();
if (fs.isDirectory(childPath)) {
log.warn("[%s] is a child directory, skipping", childPath.toString());
} else {
final File outFile = new File(outDir, fname);
// Actual copy
fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
result.addFile(outFile);
}
}
log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
}
}, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
} catch (Exception e) {
throw Throwables.propagate(e);
}
} else if (CompressionUtils.isZip(path.getName())) {
// -------- zip ---------
final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outDir, shouldRetryPredicate(), false);
log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
} else if (CompressionUtils.isGz(path.getName())) {
// -------- gzip ---------
final String fname = path.getName();
final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outFile);
log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
return result;
} else {
throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
}
}
use of org.apache.hadoop.fs.LocatedFileStatus in project druid by druid-io.
the class HdfsTaskLogs method killOlderThan.
@Override
public void killOlderThan(long timestamp) throws IOException {
Path taskLogDir = new Path(config.getDirectory());
FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
if (fs.exists(taskLogDir)) {
if (!fs.isDirectory(taskLogDir)) {
throw new IOException(String.format("taskLogDir [%s] must be a directory.", taskLogDir));
}
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
while (iter.hasNext()) {
LocatedFileStatus file = iter.next();
if (file.getModificationTime() < timestamp) {
Path p = file.getPath();
log.info("Deleting hdfs task log [%s].", p.toUri().toString());
fs.delete(p, true);
}
if (Thread.currentThread().isInterrupted()) {
throw new IOException(new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
}
}
}
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hadoop by apache.
the class ViewFileSystem method listLocatedStatus.
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException {
final InodeTree.ResolveResult<FileSystem> res = fsState.resolve(getUriPath(f), true);
final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem.listLocatedStatus(res.remainingPath);
if (res.isInternalDir()) {
return statusIter;
}
return new RemoteIterator<LocatedFileStatus>() {
@Override
public boolean hasNext() throws IOException {
return statusIter.hasNext();
}
@Override
public LocatedFileStatus next() throws IOException {
final LocatedFileStatus status = statusIter.next();
return (LocatedFileStatus) fixFileStatus(status, getChrootedPath(res, status, f));
}
};
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hadoop by apache.
the class AbstractContractRootDirectoryTest method testSimpleRootListing.
@Test
public void testSimpleRootListing() throws IOException {
describe("test the nonrecursive root listing calls");
FileSystem fs = getFileSystem();
Path root = new Path("/");
FileStatus[] statuses = fs.listStatus(root);
List<LocatedFileStatus> locatedStatusList = toList(fs.listLocatedStatus(root));
assertEquals(statuses.length, locatedStatusList.size());
List<LocatedFileStatus> fileList = toList(fs.listFiles(root, false));
assertTrue(fileList.size() <= statuses.length);
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hadoop by apache.
the class AbstractContractGetFileStatusTest method testListFilesFile.
@Test
public void testListFilesFile() throws Throwable {
describe("test the listStatus(path) on a file");
Path f = touchf("listfilesfile");
List<LocatedFileStatus> statusList = toList(getFileSystem().listFiles(f, false));
assertEquals("size of file list returned", 1, statusList.size());
assertIsNamedFile(f, statusList.get(0));
List<LocatedFileStatus> statusList2 = toListThroughNextCallsAlone(getFileSystem().listFiles(f, false));
assertEquals("size of file list returned through next() calls", 1, statusList2.size());
assertIsNamedFile(f, statusList2.get(0));
}
Aggregations