Search in sources :

Example 81 with ContentSummary

use of org.apache.hadoop.fs.ContentSummary in project hive by apache.

the class TestCopyUtils method testFSCallsFailOnParentExceptions.

@Test
public void testFSCallsFailOnParentExceptions() throws Exception {
    mockStatic(UserGroupInformation.class);
    mockStatic(ReplChangeManager.class);
    when(UserGroupInformation.getCurrentUser()).thenReturn(mock(UserGroupInformation.class));
    HiveConf conf = mock(HiveConf.class);
    conf.set(HiveConf.ConfVars.REPL_RETRY_INTIAL_DELAY.varname, "1s");
    FileSystem fs = mock(FileSystem.class);
    Path source = mock(Path.class);
    Path destination = mock(Path.class);
    ContentSummary cs = mock(ContentSummary.class);
    Exception exception = new org.apache.hadoop.fs.PathPermissionException("Failed");
    when(ReplChangeManager.checksumFor(source, fs)).thenThrow(exception).thenReturn("dummy");
    when(fs.exists(same(source))).thenThrow(exception).thenReturn(true);
    when(fs.delete(same(source), anyBoolean())).thenThrow(exception).thenReturn(true);
    when(fs.mkdirs(same(source))).thenThrow(exception).thenReturn(true);
    when(fs.rename(same(source), same(destination))).thenThrow(exception).thenReturn(true);
    when(fs.getContentSummary(same(source))).thenThrow(exception).thenReturn(cs);
    CopyUtils copyUtils = new CopyUtils(UserGroupInformation.getCurrentUser().getUserName(), conf, fs);
    CopyUtils copyUtilsSpy = Mockito.spy(copyUtils);
    try {
        copyUtilsSpy.exists(fs, source);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
    }
    Mockito.verify(fs, Mockito.times(1)).exists(source);
    try {
        copyUtils.delete(fs, source, true);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
    }
    Mockito.verify(fs, Mockito.times(1)).delete(source, true);
    try {
        copyUtils.mkdirs(fs, source);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
    }
    Mockito.verify(fs, Mockito.times(1)).mkdirs(source);
    try {
        copyUtils.rename(fs, source, destination);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
    }
    Mockito.verify(fs, Mockito.times(1)).rename(source, destination);
    try {
        copyUtilsSpy.getContentSummary(fs, source);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
        ;
    }
    Mockito.verify(fs, Mockito.times(1)).getContentSummary(source);
    try {
        copyUtilsSpy.checkSumFor(source, fs);
    } catch (Exception e) {
        assertEquals(exception.getClass(), e.getCause().getClass());
    }
    Mockito.verify(copyUtilsSpy, Mockito.times(1)).checkSumFor(source, fs);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) ContentSummary(org.apache.hadoop.fs.ContentSummary) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 82 with ContentSummary

use of org.apache.hadoop.fs.ContentSummary in project hive by apache.

the class FileUtils method copy.

/**
 * Copies files between filesystems.
 */
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException {
    boolean copied = false;
    boolean triedDistcp = false;
    /* Run distcp if source file/dir is too big */
    if (srcFS.getUri().getScheme().equals("hdfs")) {
        ContentSummary srcContentSummary = srcFS.getContentSummary(src);
        if (srcContentSummary.getFileCount() > MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXNUMFILES) && srcContentSummary.getLength() > MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXSIZE)) {
            LOG.info("Source is " + srcContentSummary.getLength() + " bytes. (MAX: " + MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXSIZE) + ")");
            LOG.info("Source is " + srcContentSummary.getFileCount() + " files. (MAX: " + MetastoreConf.getLongVar(conf, ConfVars.REPL_COPYFILE_MAXNUMFILES) + ")");
            LOG.info("Launch distributed copy (distcp) job.");
            triedDistcp = true;
            copied = distCp(srcFS, Collections.singletonList(src), dst, deleteSource, null, conf);
        }
    }
    if (!triedDistcp) {
        // Note : Currently, this implementation does not "fall back" to regular copy if distcp
        // is tried and it fails. We depend upon that behaviour in cases like replication,
        // wherein if distcp fails, there is good reason to not plod along with a trivial
        // implementation, and fail instead.
        copied = FileUtil.copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf);
    }
    return copied;
}
Also used : ContentSummary(org.apache.hadoop.fs.ContentSummary)

Example 83 with ContentSummary

use of org.apache.hadoop.fs.ContentSummary in project hive by apache.

the class PreUpgradeTool method getDataSize.

/**
 * @param location - path to a partition (or table if not partitioned) dir
 */
private static long getDataSize(Path location, HiveConf conf) throws IOException {
    FileSystem fs = location.getFileSystem(conf);
    ContentSummary cs = fs.getContentSummary(location);
    return cs.getLength();
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) ContentSummary(org.apache.hadoop.fs.ContentSummary)

Example 84 with ContentSummary

use of org.apache.hadoop.fs.ContentSummary in project hive by apache.

the class HiveHarFileSystem method getContentSummary.

@Override
public ContentSummary getContentSummary(Path f) throws IOException {
    // HarFileSystem has a bug where this method does not work properly
    // if the underlying FS is HDFS. See MAPREDUCE-1877 for more
    // information. This method is from FileSystem.
    FileStatus status = getFileStatus(f);
    if (!status.isDir()) {
        // f is a file
        return new ContentSummary(status.getLen(), 1, 0);
    }
    // f is a directory
    long[] summary = { 0, 0, 1 };
    for (FileStatus s : listStatus(f)) {
        ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) : new ContentSummary(s.getLen(), 1, 0);
        summary[0] += c.getLength();
        summary[1] += c.getFileCount();
        summary[2] += c.getDirectoryCount();
    }
    return new ContentSummary(summary[0], summary[1], summary[2]);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) ContentSummary(org.apache.hadoop.fs.ContentSummary)

Aggregations

ContentSummary (org.apache.hadoop.fs.ContentSummary)84 Path (org.apache.hadoop.fs.Path)60 Test (org.junit.Test)52 FileSystem (org.apache.hadoop.fs.FileSystem)21 IOException (java.io.IOException)13 Configuration (org.apache.hadoop.conf.Configuration)9 ArrayList (java.util.ArrayList)6 URI (java.net.URI)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)5 DSQuotaExceededException (org.apache.hadoop.hdfs.protocol.DSQuotaExceededException)5 QuotaExceededException (org.apache.hadoop.hdfs.protocol.QuotaExceededException)5 WebHdfsFileSystem (org.apache.hadoop.hdfs.web.WebHdfsFileSystem)5 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)5 JobConf (org.apache.hadoop.mapred.JobConf)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 OutputStream (java.io.OutputStream)4 HttpURLConnection (java.net.HttpURLConnection)4 List (java.util.List)4 Properties (java.util.Properties)4