use of org.apache.hadoop.mapreduce.Mapper in project hadoop by apache.
the class TestCopyMapper method doTestIgnoreFailures.
private void doTestIgnoreFailures(boolean ignoreFailures) {
try {
deleteState();
createSourceData();
FileSystem fs = cluster.getFileSystem();
CopyMapper copyMapper = new CopyMapper();
StubContext stubContext = new StubContext(getConfiguration(), null, 0);
Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
Configuration configuration = context.getConfiguration();
configuration.setBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), ignoreFailures);
configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), true);
configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), true);
copyMapper.setup(context);
for (Path path : pathList) {
final FileStatus fileStatus = fs.getFileStatus(path);
if (!fileStatus.isDirectory()) {
fs.delete(path, true);
copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fileStatus), context);
}
}
if (ignoreFailures) {
for (Text value : stubContext.getWriter().values()) {
Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
}
}
Assert.assertTrue("There should have been an exception.", ignoreFailures);
} catch (Exception e) {
Assert.assertTrue("Unexpected exception: " + e.getMessage(), !ignoreFailures);
e.printStackTrace();
}
}
use of org.apache.hadoop.mapreduce.Mapper in project hadoop by apache.
the class TestCopyMapper method testPreserveBlockSizeAndReplicationImpl.
private void testPreserveBlockSizeAndReplicationImpl(boolean preserve) {
try {
deleteState();
createSourceData();
FileSystem fs = cluster.getFileSystem();
CopyMapper copyMapper = new CopyMapper();
StubContext stubContext = new StubContext(getConfiguration(), null, 0);
Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
Configuration configuration = context.getConfiguration();
EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
if (preserve) {
fileAttributes.add(DistCpOptions.FileAttribute.BLOCKSIZE);
fileAttributes.add(DistCpOptions.FileAttribute.REPLICATION);
}
configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes));
copyMapper.setup(context);
for (Path path : pathList) {
final FileStatus fileStatus = fs.getFileStatus(path);
copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fileStatus), context);
}
// Check that the block-size/replication aren't preserved.
for (Path path : pathList) {
final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
final FileStatus source = fs.getFileStatus(path);
final FileStatus target = fs.getFileStatus(targetPath);
if (!source.isDirectory()) {
Assert.assertTrue(preserve || source.getBlockSize() != target.getBlockSize());
Assert.assertTrue(preserve || source.getReplication() != target.getReplication());
Assert.assertTrue(!preserve || source.getBlockSize() == target.getBlockSize());
Assert.assertTrue(!preserve || source.getReplication() == target.getReplication());
}
}
} catch (Exception e) {
Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
e.printStackTrace();
}
}
use of org.apache.hadoop.mapreduce.Mapper in project hadoop by apache.
the class TestCopyMapper method testFileToDir.
@Test(timeout = 40000)
public void testFileToDir() {
try {
deleteState();
createSourceData();
FileSystem fs = cluster.getFileSystem();
CopyMapper copyMapper = new CopyMapper();
StubContext stubContext = new StubContext(getConfiguration(), null, 0);
Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
touchFile(SOURCE_PATH + "/src/file");
mkdirs(TARGET_PATH + "/src/file");
try {
copyMapper.setup(context);
copyMapper.map(new Text("/src/file"), new CopyListingFileStatus(fs.getFileStatus(new Path(SOURCE_PATH + "/src/file"))), context);
} catch (IOException e) {
Assert.assertTrue(e.getMessage().startsWith("Can't replace"));
}
} catch (Exception e) {
LOG.error("Exception encountered ", e);
Assert.fail("Test failed: " + e.getMessage());
}
}
use of org.apache.hadoop.mapreduce.Mapper in project hadoop by apache.
the class TestDistCpSyncReverseBase method testAndVerify.
private void testAndVerify(int numDeletedAndModified) throws Exception {
SnapshotDiffReport report = dfs.getSnapshotDiffReport(target, "s2", "s1");
System.out.println(report);
final FsShell shell = new FsShell(conf);
lsrSource("Before sync source: ", shell, source);
lsr("Before sync target: ", shell, target);
DistCpSync distCpSync = new DistCpSync(options, conf);
// do the sync
distCpSync.sync();
lsr("After sync target: ", shell, target);
// make sure the source path has been updated to the snapshot path
final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s1");
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// build copy listing
final Path listingPath = new Path("/tmp/META/fileList.seq");
CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
listing.buildListing(listingPath, options);
Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
CopyMapper copyMapper = new CopyMapper();
StubContext stubContext = new StubContext(conf, null, 0);
Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
// Enable append
context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
copyMapper.setup(context);
for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
copyMapper.map(entry.getKey(), entry.getValue(), context);
}
// verify that we only list modified and created files/directories
Assert.assertEquals(numDeletedAndModified, copyListing.size());
lsr("After Copy target: ", shell, target);
// verify the source and target now has the same structure
verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
use of org.apache.hadoop.mapreduce.Mapper in project ignite by apache.
the class HadoopV2MapTask method run0.
/**
* {@inheritDoc}
*/
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
OutputFormat outputFormat = null;
Exception err = null;
JobContextImpl jobCtx = taskCtx.jobContext();
if (taskCtx.taskInfo().hasMapperIndex())
HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
else
HadoopMapperUtils.clearMapperIndex();
try {
HadoopV2Context hadoopCtx = hadoopContext();
InputSplit nativeSplit = hadoopCtx.getInputSplit();
if (nativeSplit == null)
throw new IgniteCheckedException("Input split cannot be null.");
InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
reader.initialize(nativeSplit, hadoopCtx);
hadoopCtx.reader(reader);
HadoopJobInfo jobInfo = taskCtx.job().info();
outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
try {
mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
taskCtx.onMapperFinished();
} finally {
closeWriter();
}
commit(outputFormat);
} catch (InterruptedException e) {
err = e;
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
} catch (Exception e) {
err = e;
throw new IgniteCheckedException(e);
} finally {
HadoopMapperUtils.clearMapperIndex();
if (err != null)
abort(outputFormat);
}
}
Aggregations