use of org.apache.hadoop.fs.ContentSummary in project hive by apache.
the class TestCopyUtils method testFSCallsFailOnParentExceptions.
@Test
public void testFSCallsFailOnParentExceptions() throws Exception {
mockStatic(UserGroupInformation.class);
mockStatic(ReplChangeManager.class);
when(UserGroupInformation.getCurrentUser()).thenReturn(mock(UserGroupInformation.class));
HiveConf conf = mock(HiveConf.class);
conf.set(HiveConf.ConfVars.REPL_RETRY_INTIAL_DELAY.varname, "1s");
FileSystem fs = mock(FileSystem.class);
Path source = mock(Path.class);
Path destination = mock(Path.class);
ContentSummary cs = mock(ContentSummary.class);
Exception exception = new org.apache.hadoop.fs.PathPermissionException("Failed");
when(ReplChangeManager.checksumFor(source, fs)).thenThrow(exception).thenReturn("dummy");
when(fs.exists(same(source))).thenThrow(exception).thenReturn(true);
when(fs.delete(same(source), anyBoolean())).thenThrow(exception).thenReturn(true);
when(fs.mkdirs(same(source))).thenThrow(exception).thenReturn(true);
when(fs.rename(same(source), same(destination))).thenThrow(exception).thenReturn(true);
when(fs.getContentSummary(same(source))).thenThrow(exception).thenReturn(cs);
CopyUtils copyUtils = new CopyUtils(UserGroupInformation.getCurrentUser().getUserName(), conf, fs);
CopyUtils copyUtilsSpy = Mockito.spy(copyUtils);
try {
copyUtilsSpy.exists(fs, source);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
}
Mockito.verify(fs, Mockito.times(1)).exists(source);
try {
copyUtils.delete(fs, source, true);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
}
Mockito.verify(fs, Mockito.times(1)).delete(source, true);
try {
copyUtils.mkdirs(fs, source);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
}
Mockito.verify(fs, Mockito.times(1)).mkdirs(source);
try {
copyUtils.rename(fs, source, destination);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
}
Mockito.verify(fs, Mockito.times(1)).rename(source, destination);
try {
copyUtilsSpy.getContentSummary(fs, source);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
;
}
Mockito.verify(fs, Mockito.times(1)).getContentSummary(source);
try {
copyUtilsSpy.checkSumFor(source, fs);
} catch (Exception e) {
assertEquals(exception.getClass(), e.getCause().getClass());
}
Mockito.verify(copyUtilsSpy, Mockito.times(1)).checkSumFor(source, fs);
}
use of org.apache.hadoop.fs.ContentSummary in project hive by apache.
the class FileUtils method copy.
/**
* Copies files between filesystems.
*/
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException {
boolean copied = false;
boolean triedDistcp = false;
/* Run distcp if source file/dir is too big */
if (srcFS.getUri().getScheme().equals("hdfs")) {
ContentSummary srcContentSummary = srcFS.getContentSummary(src);
if (srcContentSummary.getFileCount() > MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXNUMFILES) && srcContentSummary.getLength() > MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXSIZE)) {
LOG.info("Source is " + srcContentSummary.getLength() + " bytes. (MAX: " + MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXSIZE) + ")");
LOG.info("Source is " + srcContentSummary.getFileCount() + " files. (MAX: " + MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXNUMFILES) + ")");
LOG.info("Launch distributed copy (distcp) job.");
triedDistcp = true;
copied = distCp(srcFS, Collections.singletonList(src), dst, deleteSource, null, conf);
}
}
if (!triedDistcp) {
// Note : Currently, this implementation does not "fall back" to regular copy if distcp
// is tried and it fails. We depend upon that behaviour in cases like replication,
// wherein if distcp fails, there is good reason to not plod along with a trivial
// implementation, and fail instead.
copied = FileUtil.copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf);
}
return copied;
}
use of org.apache.hadoop.fs.ContentSummary in project hive by apache.
the class PreUpgradeTool method getDataSize.
/**
* @param location - path to a partition (or table if not partitioned) dir
*/
private static long getDataSize(Path location, HiveConf conf) throws IOException {
FileSystem fs = location.getFileSystem(conf);
ContentSummary cs = fs.getContentSummary(location);
return cs.getLength();
}
use of org.apache.hadoop.fs.ContentSummary in project hive by apache.
the class HiveHarFileSystem method getContentSummary.
@Override
public ContentSummary getContentSummary(Path f) throws IOException {
// HarFileSystem has a bug where this method does not work properly
// if the underlying FS is HDFS. See MAPREDUCE-1877 for more
// information. This method is from FileSystem.
FileStatus status = getFileStatus(f);
if (!status.isDir()) {
// f is a file
return new ContentSummary(status.getLen(), 1, 0);
}
// f is a directory
long[] summary = { 0, 0, 1 };
for (FileStatus s : listStatus(f)) {
ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) : new ContentSummary(s.getLen(), 1, 0);
summary[0] += c.getLength();
summary[1] += c.getFileCount();
summary[2] += c.getDirectoryCount();
}
return new ContentSummary(summary[0], summary[1], summary[2]);
}
Aggregations