Search in sources :

Example 31 with CopyListingFileStatus

use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.

the class TestDistCpUtils method testPreserveOnFileDownwardRecursion.

@Test
public void testPreserveOnFileDownwardRecursion() throws IOException {
    FileSystem fs = FileSystem.get(config);
    EnumSet<FileAttribute> attributes = EnumSet.allOf(FileAttribute.class);
    // Remove ACL because tests run with dfs.namenode.acls.enabled false
    attributes.remove(FileAttribute.ACL);
    Path src = new Path("/tmp/src2");
    Path f0 = new Path("/f0");
    Path f1 = new Path("/d1/f1");
    Path f2 = new Path("/d1/d2/f2");
    Path d1 = new Path("/d1/");
    Path d2 = new Path("/d1/d2/");
    createFile(fs, src);
    createFile(fs, f0);
    createFile(fs, f1);
    createFile(fs, f2);
    fs.setPermission(src, almostFullPerm);
    fs.setOwner(src, "somebody", "somebody-group");
    fs.setTimes(src, 0, 0);
    fs.setReplication(src, (short) 1);
    fs.setPermission(d1, fullPerm);
    fs.setOwner(d1, "anybody", "anybody-group");
    fs.setTimes(d1, 400, 400);
    fs.setReplication(d1, (short) 3);
    fs.setPermission(d2, fullPerm);
    fs.setOwner(d2, "anybody", "anybody-group");
    fs.setTimes(d2, 300, 300);
    fs.setReplication(d2, (short) 3);
    fs.setPermission(f0, fullPerm);
    fs.setOwner(f0, "anybody", "anybody-group");
    fs.setTimes(f0, 200, 200);
    fs.setReplication(f0, (short) 3);
    fs.setPermission(f1, fullPerm);
    fs.setOwner(f1, "anybody", "anybody-group");
    fs.setTimes(f1, 200, 200);
    fs.setReplication(f1, (short) 3);
    fs.setPermission(f2, fullPerm);
    fs.setOwner(f2, "anybody", "anybody-group");
    fs.setTimes(f2, 200, 200);
    fs.setReplication(f2, (short) 3);
    CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));
    DistCpUtils.preserve(fs, f0, srcStatus, attributes, false);
    cluster.triggerHeartbeats();
    // FileStatus.equals only compares path field, must explicitly compare all fields
    // attributes of src -> f0 ? should be yes
    CopyListingFileStatus f0Status = new CopyListingFileStatus(fs.getFileStatus(f0));
    Assert.assertTrue(srcStatus.getPermission().equals(f0Status.getPermission()));
    Assert.assertTrue(srcStatus.getOwner().equals(f0Status.getOwner()));
    Assert.assertTrue(srcStatus.getGroup().equals(f0Status.getGroup()));
    Assert.assertTrue(srcStatus.getAccessTime() == f0Status.getAccessTime());
    Assert.assertTrue(srcStatus.getModificationTime() == f0Status.getModificationTime());
    Assert.assertTrue(srcStatus.getReplication() == f0Status.getReplication());
    // attributes of src -> f1 ? should be no
    CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
    Assert.assertFalse(srcStatus.getPermission().equals(f1Status.getPermission()));
    Assert.assertFalse(srcStatus.getOwner().equals(f1Status.getOwner()));
    Assert.assertFalse(srcStatus.getGroup().equals(f1Status.getGroup()));
    Assert.assertFalse(srcStatus.getAccessTime() == f1Status.getAccessTime());
    Assert.assertFalse(srcStatus.getModificationTime() == f1Status.getModificationTime());
    Assert.assertFalse(srcStatus.getReplication() == f1Status.getReplication());
    // attributes of src -> f2 ? should be no
    CopyListingFileStatus f2Status = new CopyListingFileStatus(fs.getFileStatus(f2));
    Assert.assertFalse(srcStatus.getPermission().equals(f2Status.getPermission()));
    Assert.assertFalse(srcStatus.getOwner().equals(f2Status.getOwner()));
    Assert.assertFalse(srcStatus.getGroup().equals(f2Status.getGroup()));
    Assert.assertFalse(srcStatus.getAccessTime() == f2Status.getAccessTime());
    Assert.assertFalse(srcStatus.getModificationTime() == f2Status.getModificationTime());
    Assert.assertFalse(srcStatus.getReplication() == f2Status.getReplication());
    // attributes of src -> d1 ? should be no
    CopyListingFileStatus d1Status = new CopyListingFileStatus(fs.getFileStatus(d1));
    Assert.assertFalse(srcStatus.getPermission().equals(d1Status.getPermission()));
    Assert.assertFalse(srcStatus.getOwner().equals(d1Status.getOwner()));
    Assert.assertFalse(srcStatus.getGroup().equals(d1Status.getGroup()));
    Assert.assertTrue(d1Status.getAccessTime() == 400);
    Assert.assertTrue(d1Status.getModificationTime() == 400);
    Assert.assertFalse(srcStatus.getReplication() == d1Status.getReplication());
    // attributes of src -> d2 ? should be no
    CopyListingFileStatus d2Status = new CopyListingFileStatus(fs.getFileStatus(d2));
    Assert.assertFalse(srcStatus.getPermission().equals(d2Status.getPermission()));
    Assert.assertFalse(srcStatus.getOwner().equals(d2Status.getOwner()));
    Assert.assertFalse(srcStatus.getGroup().equals(d2Status.getGroup()));
    Assert.assertTrue(d2Status.getAccessTime() == 300);
    Assert.assertTrue(d2Status.getModificationTime() == 300);
    Assert.assertFalse(srcStatus.getReplication() == d2Status.getReplication());
}
Also used : Path(org.apache.hadoop.fs.Path) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) FileAttribute(org.apache.hadoop.tools.DistCpOptions.FileAttribute) Test(org.junit.Test)

Example 32 with CopyListingFileStatus

use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.

the class TestCopyMapper method testSingleFileCopy.

/**
   * If a single file is being copied to a location where the file (of the same
   * name) already exists, then the file shouldn't be skipped.
   */
@Test(timeout = 40000)
public void testSingleFileCopy() {
    try {
        deleteState();
        touchFile(SOURCE_PATH + "/1");
        Path sourceFilePath = pathList.get(0);
        Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
        touchFile(targetFilePath.toString());
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, // Parent directory.
        targetFilePath.getParent().toString());
        copyMapper.setup(context);
        final CopyListingFileStatus sourceFileStatus = new CopyListingFileStatus(fs.getFileStatus(sourceFilePath));
        long before = fs.getFileStatus(targetFilePath).getModificationTime();
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
        long after = fs.getFileStatus(targetFilePath).getModificationTime();
        Assert.assertTrue("File should have been skipped", before == after);
        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, // Specify the file path.
        targetFilePath.toString());
        copyMapper.setup(context);
        before = fs.getFileStatus(targetFilePath).getModificationTime();
        try {
            Thread.sleep(2);
        } catch (Throwable ignore) {
        }
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
        after = fs.getFileStatus(targetFilePath).getModificationTime();
        Assert.assertTrue("File should have been overwritten.", before < after);
    } catch (Exception exception) {
        Assert.fail("Unexpected exception: " + exception.getMessage());
        exception.printStackTrace();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 33 with CopyListingFileStatus

use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.

the class TestRetriableFileCopyCommand method testFailOnCloseError.

@SuppressWarnings("rawtypes")
@Test
public void testFailOnCloseError() throws Exception {
    Mapper.Context context = mock(Mapper.Context.class);
    doReturn(new Configuration()).when(context).getConfiguration();
    Exception expectedEx = new IOException("boom");
    OutputStream out = mock(OutputStream.class);
    doThrow(expectedEx).when(out).close();
    File f = File.createTempFile(this.getClass().getSimpleName(), null);
    f.deleteOnExit();
    CopyListingFileStatus stat = new CopyListingFileStatus(new FileStatus(1L, false, 1, 1024, 0, new Path(f.toURI())));
    Exception actualEx = null;
    try {
        new RetriableFileCopyCommand("testFailOnCloseError", FileAction.OVERWRITE).copyBytes(stat, 0, out, 512, context);
    } catch (Exception e) {
        actualEx = e;
    }
    assertNotNull("close didn't fail", actualEx);
    assertEquals(expectedEx, actualEx);
}
Also used : Path(org.apache.hadoop.fs.Path) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) OutputStream(java.io.OutputStream) IOException(java.io.IOException) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 34 with CopyListingFileStatus

use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.

the class TestCopyMapper method testCopy.

private void testCopy(boolean preserveChecksum) throws Exception {
    deleteState();
    if (preserveChecksum) {
        createSourceDataWithDifferentChecksumType();
    } else {
        createSourceData();
    }
    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.of(DistCpOptions.FileAttribute.REPLICATION);
    if (preserveChecksum) {
        fileAttributes.add(DistCpOptions.FileAttribute.CHECKSUMTYPE);
    }
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes));
    copyMapper.setup(context);
    for (Path path : pathList) {
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }
    // Check that the maps worked.
    verifyCopy(fs, preserveChecksum);
    Assert.assertEquals(pathList.size(), stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
    if (!preserveChecksum) {
        Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    } else {
        Assert.assertEquals(nFiles * NON_DEFAULT_BLOCK_SIZE * 2, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    }
    testCopyingExistingFiles(fs, copyMapper, context);
    for (Text value : stubContext.getWriter().values()) {
        Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("SKIP:"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text)

Example 35 with CopyListingFileStatus

use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.

the class TestCopyMapper method testMakeDirFailure.

@Test(timeout = 40000)
public void testMakeDirFailure() {
    try {
        deleteState();
        createSourceData();
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        Configuration configuration = context.getConfiguration();
        String workPath = new Path("webhdfs://localhost:1234/*/*/*/?/").makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
        configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
        copyMapper.setup(context);
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))), new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
        Assert.assertTrue("There should have been an exception.", false);
    } catch (Exception ignore) {
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Aggregations

CopyListingFileStatus (org.apache.hadoop.tools.CopyListingFileStatus)44 Path (org.apache.hadoop.fs.Path)41 FileSystem (org.apache.hadoop.fs.FileSystem)36 Test (org.junit.Test)29 Text (org.apache.hadoop.io.Text)23 FileAttribute (org.apache.hadoop.tools.DistCpOptions.FileAttribute)20 StubContext (org.apache.hadoop.tools.StubContext)17 IOException (java.io.IOException)16 Mapper (org.apache.hadoop.mapreduce.Mapper)16 AccessControlException (org.apache.hadoop.security.AccessControlException)13 Configuration (org.apache.hadoop.conf.Configuration)11 DistCpOptions (org.apache.hadoop.tools.DistCpOptions)9 FileStatus (org.apache.hadoop.fs.FileStatus)8 FsPermission (org.apache.hadoop.fs.permission.FsPermission)6 SequenceFile (org.apache.hadoop.io.SequenceFile)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 ArrayList (java.util.ArrayList)3 OutputStream (java.io.OutputStream)2 AclEntry (org.apache.hadoop.fs.permission.AclEntry)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2