Search in sources :

Example 11 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testSingleFileCopy.

/**
   * If a single file is being copied to a location where the file (of the same
   * name) already exists, then the file shouldn't be skipped.
   */
@Test(timeout = 40000)
public void testSingleFileCopy() {
    try {
        deleteState();
        touchFile(SOURCE_PATH + "/1");
        Path sourceFilePath = pathList.get(0);
        Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
        touchFile(targetFilePath.toString());
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, // Parent directory.
        targetFilePath.getParent().toString());
        copyMapper.setup(context);
        final CopyListingFileStatus sourceFileStatus = new CopyListingFileStatus(fs.getFileStatus(sourceFilePath));
        long before = fs.getFileStatus(targetFilePath).getModificationTime();
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
        long after = fs.getFileStatus(targetFilePath).getModificationTime();
        Assert.assertTrue("File should have been skipped", before == after);
        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, // Specify the file path.
        targetFilePath.toString());
        copyMapper.setup(context);
        before = fs.getFileStatus(targetFilePath).getModificationTime();
        try {
            Thread.sleep(2);
        } catch (Throwable ignore) {
        }
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
        after = fs.getFileStatus(targetFilePath).getModificationTime();
        Assert.assertTrue("File should have been overwritten.", before < after);
    } catch (Exception exception) {
        Assert.fail("Unexpected exception: " + exception.getMessage());
        exception.printStackTrace();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 12 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testCopy.

private void testCopy(boolean preserveChecksum) throws Exception {
    deleteState();
    if (preserveChecksum) {
        createSourceDataWithDifferentChecksumType();
    } else {
        createSourceData();
    }
    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.of(DistCpOptions.FileAttribute.REPLICATION);
    if (preserveChecksum) {
        fileAttributes.add(DistCpOptions.FileAttribute.CHECKSUMTYPE);
    }
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes));
    copyMapper.setup(context);
    for (Path path : pathList) {
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }
    // Check that the maps worked.
    verifyCopy(fs, preserveChecksum);
    Assert.assertEquals(pathList.size(), stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
    if (!preserveChecksum) {
        Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    } else {
        Assert.assertEquals(nFiles * NON_DEFAULT_BLOCK_SIZE * 2, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    }
    testCopyingExistingFiles(fs, copyMapper, context);
    for (Text value : stubContext.getWriter().values()) {
        Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("SKIP:"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text)

Example 13 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testMakeDirFailure.

@Test(timeout = 40000)
public void testMakeDirFailure() {
    try {
        deleteState();
        createSourceData();
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        Configuration configuration = context.getConfiguration();
        String workPath = new Path("webhdfs://localhost:1234/*/*/*/?/").makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
        configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
        copyMapper.setup(context);
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))), new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
        Assert.assertTrue("There should have been an exception.", false);
    } catch (Exception ignore) {
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 14 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testFailCopyWithAccessControlException.

@Test(timeout = 40000)
public void testFailCopyWithAccessControlException() {
    try {
        deleteState();
        createSourceData();
        UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
        final CopyMapper copyMapper = new CopyMapper();
        final StubContext stubContext = tmpUser.doAs(new PrivilegedAction<StubContext>() {

            @Override
            public StubContext run() {
                try {
                    return new StubContext(getConfiguration(), null, 0);
                } catch (Exception e) {
                    LOG.error("Exception encountered ", e);
                    throw new RuntimeException(e);
                }
            }
        });
        EnumSet<DistCpOptions.FileAttribute> preserveStatus = EnumSet.allOf(DistCpOptions.FileAttribute.class);
        preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
        preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
        final Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, DistCpUtils.packAttributes(preserveStatus));
        touchFile(SOURCE_PATH + "/src/file");
        OutputStream out = cluster.getFileSystem().create(new Path(TARGET_PATH + "/src/file"));
        out.write("hello world".getBytes());
        out.close();
        cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"), new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
        cluster.getFileSystem().setPermission(new Path(TARGET_PATH + "/src/file"), new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
        final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {

            @Override
            public FileSystem run() {
                try {
                    return FileSystem.get(configuration);
                } catch (IOException e) {
                    LOG.error("Exception encountered ", e);
                    Assert.fail("Test failed: " + e.getMessage());
                    throw new RuntimeException("Test ought to fail here");
                }
            }
        });
        tmpUser.doAs(new PrivilegedAction<Integer>() {

            @Override
            public Integer run() {
                try {
                    copyMapper.setup(context);
                    copyMapper.map(new Text("/src/file"), new CopyListingFileStatus(tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file"))), context);
                    Assert.fail("Didn't expect the file to be copied");
                } catch (AccessControlException ignore) {
                } catch (Exception e) {
                    // itself.
                    if (e.getCause() == null || e.getCause().getCause() == null || !(e.getCause().getCause() instanceof AccessControlException)) {
                        throw new RuntimeException(e);
                    }
                }
                return null;
            }
        });
    } catch (Exception e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Test failed: " + e.getMessage());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) AccessControlException(org.apache.hadoop.security.AccessControlException) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Mapper(org.apache.hadoop.mapreduce.Mapper) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 15 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method doTestIgnoreFailures.

private void doTestIgnoreFailures(boolean ignoreFailures) {
    try {
        deleteState();
        createSourceData();
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        Configuration configuration = context.getConfiguration();
        configuration.setBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), ignoreFailures);
        configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), true);
        configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), true);
        copyMapper.setup(context);
        for (Path path : pathList) {
            final FileStatus fileStatus = fs.getFileStatus(path);
            if (!fileStatus.isDirectory()) {
                fs.delete(path, true);
                copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fileStatus), context);
            }
        }
        if (ignoreFailures) {
            for (Text value : stubContext.getWriter().values()) {
                Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
            }
        }
        Assert.assertTrue("There should have been an exception.", ignoreFailures);
    } catch (Exception e) {
        Assert.assertTrue("Unexpected exception: " + e.getMessage(), !ignoreFailures);
        e.printStackTrace();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Mapper(org.apache.hadoop.mapreduce.Mapper) FileStatus(org.apache.hadoop.fs.FileStatus) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) Configuration(org.apache.hadoop.conf.Configuration) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException)

Aggregations

Path (org.apache.hadoop.fs.Path)17 Text (org.apache.hadoop.io.Text)17 CopyListingFileStatus (org.apache.hadoop.tools.CopyListingFileStatus)17 StubContext (org.apache.hadoop.tools.StubContext)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 Mapper (org.apache.hadoop.mapreduce.Mapper)15 IOException (java.io.IOException)13 AccessControlException (org.apache.hadoop.security.AccessControlException)13 Test (org.junit.Test)11 Configuration (org.apache.hadoop.conf.Configuration)8 DistCpOptions (org.apache.hadoop.tools.DistCpOptions)8 FileStatus (org.apache.hadoop.fs.FileStatus)5 FsPermission (org.apache.hadoop.fs.permission.FsPermission)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)2 DataOutputStream (java.io.DataOutputStream)1 OutputStream (java.io.OutputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1