Search in sources :

Example 6 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestUniformSizeInputFormat method testGetSplits.

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);
    int sizePerMap = totalFileSize / nMaps;
    checkSplits(listFile, splits);
    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        RecordReader<Text, CopyListingFileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, null);
        StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, 0);
        final TaskAttemptContext taskAttemptContext = stubContext.getContext();
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus[] fileStatus = fs.listStatus(sourcePath);
            if (fileStatus.length > 1) {
                continue;
            }
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);
        doubleCheckedTotalSize += currentSplitSize;
    }
    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) FileStatus(org.apache.hadoop.fs.FileStatus) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 7 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestDynamicInputFormat method testGetSplits.

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<Text, CopyListingFileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    int nFiles = 0;
    int taskId = 0;
    for (InputSplit split : splits) {
        StubContext stubContext = new StubContext(jobContext.getConfiguration(), null, taskId);
        final TaskAttemptContext taskAttemptContext = stubContext.getContext();
        RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        stubContext.setReader(recordReader);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);
        ++taskId;
    }
    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) Test(org.junit.Test)

Example 8 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testCopyFailOnBlockSizeDifference.

@Test(timeout = 40000)
public void testCopyFailOnBlockSizeDifference() {
    try {
        deleteState();
        createSourceDataWithDifferentBlockSize();
        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StubContext stubContext = new StubContext(getConfiguration(), null, 0);
        Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
        Configuration configuration = context.getConfiguration();
        EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
        configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes));
        copyMapper.setup(context);
        for (Path path : pathList) {
            final FileStatus fileStatus = fs.getFileStatus(path);
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fileStatus), context);
        }
        Assert.fail("Copy should have failed because of block-size difference.");
    } catch (Exception exception) {
        // Check that the exception suggests the use of -pb/-skipCrc.
        Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
        Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Mapper(org.apache.hadoop.mapreduce.Mapper) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) StubContext(org.apache.hadoop.tools.StubContext) Test(org.junit.Test)

Example 9 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method doTestIgnoreFailuresDoubleWrapped.

/**
   * This test covers the case where the CopyReadException is double-wrapped and
   * the mapper should be able to ignore this nested read exception.
   * @see #doTestIgnoreFailures
   */
private void doTestIgnoreFailuresDoubleWrapped(final boolean ignoreFailures) {
    try {
        deleteState();
        createSourceData();
        final UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
        final CopyMapper copyMapper = new CopyMapper();
        final Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = tmpUser.doAs(new PrivilegedAction<Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text>>() {

            @Override
            public Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> run() {
                try {
                    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
                    return stubContext.getContext();
                } catch (Exception e) {
                    LOG.error("Exception encountered when get stub context", e);
                    throw new RuntimeException(e);
                }
            }
        });
        touchFile(SOURCE_PATH + "/src/file");
        mkdirs(TARGET_PATH);
        cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"), new FsPermission(FsAction.NONE, FsAction.NONE, FsAction.NONE));
        cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short) 511));
        context.getConfiguration().setBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), ignoreFailures);
        final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {

            @Override
            public FileSystem run() {
                try {
                    return FileSystem.get(configuration);
                } catch (IOException e) {
                    LOG.error("Exception encountered when get FileSystem.", e);
                    throw new RuntimeException(e);
                }
            }
        });
        tmpUser.doAs(new PrivilegedAction<Integer>() {

            @Override
            public Integer run() {
                try {
                    copyMapper.setup(context);
                    copyMapper.map(new Text("/src/file"), new CopyListingFileStatus(tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file"))), context);
                    Assert.assertTrue("Should have thrown an IOException if not " + "ignoring failures", ignoreFailures);
                } catch (IOException e) {
                    LOG.error("Unexpected exception encountered. ", e);
                    Assert.assertFalse("Should not have thrown an IOException if " + "ignoring failures", ignoreFailures);
                // the IOException is not thrown again as it's expected
                } catch (Exception e) {
                    LOG.error("Exception encountered when the mapper copies file.", e);
                    throw new RuntimeException(e);
                }
                return null;
            }
        });
    } catch (Exception e) {
        LOG.error("Unexpected exception encountered. ", e);
        Assert.fail("Test failed: " + e.getMessage());
    }
}
Also used : StubContext(org.apache.hadoop.tools.StubContext) Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Mapper(org.apache.hadoop.mapreduce.Mapper) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 10 with StubContext

use of org.apache.hadoop.tools.StubContext in project hadoop by apache.

the class TestCopyMapper method testCopyReadableFiles.

@Test(timeout = 40000)
public void testCopyReadableFiles() {
    try {
        deleteState();
        createSourceData();
        UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
        final CopyMapper copyMapper = new CopyMapper();
        final Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = tmpUser.doAs(new PrivilegedAction<Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text>>() {

            @Override
            public Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> run() {
                try {
                    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
                    return stubContext.getContext();
                } catch (Exception e) {
                    LOG.error("Exception encountered ", e);
                    throw new RuntimeException(e);
                }
            }
        });
        touchFile(SOURCE_PATH + "/src/file");
        mkdirs(TARGET_PATH);
        cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"), new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
        cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short) 511));
        final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {

            @Override
            public FileSystem run() {
                try {
                    return FileSystem.get(configuration);
                } catch (IOException e) {
                    LOG.error("Exception encountered ", e);
                    Assert.fail("Test failed: " + e.getMessage());
                    throw new RuntimeException("Test ought to fail here");
                }
            }
        });
        tmpUser.doAs(new PrivilegedAction<Integer>() {

            @Override
            public Integer run() {
                try {
                    copyMapper.setup(context);
                    copyMapper.map(new Text("/src/file"), new CopyListingFileStatus(tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file"))), context);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
                return null;
            }
        });
    } catch (Exception e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Test failed: " + e.getMessage());
    }
}
Also used : StubContext(org.apache.hadoop.tools.StubContext) Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Mapper(org.apache.hadoop.mapreduce.Mapper) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)17 Text (org.apache.hadoop.io.Text)17 CopyListingFileStatus (org.apache.hadoop.tools.CopyListingFileStatus)17 StubContext (org.apache.hadoop.tools.StubContext)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 Mapper (org.apache.hadoop.mapreduce.Mapper)15 IOException (java.io.IOException)13 AccessControlException (org.apache.hadoop.security.AccessControlException)13 Test (org.junit.Test)11 Configuration (org.apache.hadoop.conf.Configuration)8 DistCpOptions (org.apache.hadoop.tools.DistCpOptions)8 FileStatus (org.apache.hadoop.fs.FileStatus)5 FsPermission (org.apache.hadoop.fs.permission.FsPermission)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)2 DataOutputStream (java.io.DataOutputStream)1 OutputStream (java.io.OutputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1