use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.
the class TestDynamicInputFormat method getOptions.
private static DistCpOptions getOptions() throws Exception {
Path sourcePath = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/source");
Path targetPath = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/target");
List<Path> sourceList = new ArrayList<Path>();
sourceList.add(sourcePath);
DistCpOptions options = new DistCpOptions(sourceList, targetPath);
options.setMaxMaps(NUM_SPLITS);
return options;
}
use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.
the class TestUniformSizeInputFormat method testGetSplits.
public void testGetSplits(int nMaps) throws Exception {
DistCpOptions options = getOptions(nMaps);
Configuration configuration = new Configuration();
configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);
int sizePerMap = totalFileSize / nMaps;
checkSplits(listFile, splits);
int doubleCheckedTotalSize = 0;
int previousSplitSize = -1;
for (int i = 0; i < splits.size(); ++i) {
InputSplit split = splits.get(i);
int currentSplitSize = 0;
RecordReader<Text, CopyListingFileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, null);
StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, 0);
final TaskAttemptContext taskAttemptContext = stubContext.getContext();
recordReader.initialize(split, taskAttemptContext);
while (recordReader.nextKeyValue()) {
Path sourcePath = recordReader.getCurrentValue().getPath();
FileSystem fs = sourcePath.getFileSystem(configuration);
FileStatus[] fileStatus = fs.listStatus(sourcePath);
if (fileStatus.length > 1) {
continue;
}
currentSplitSize += fileStatus[0].getLen();
}
Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);
doubleCheckedTotalSize += currentSplitSize;
}
Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}
use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.
the class TestUniformSizeInputFormat method getOptions.
private static DistCpOptions getOptions(int nMaps) throws Exception {
Path sourcePath = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/source");
Path targetPath = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/target");
List<Path> sourceList = new ArrayList<Path>();
sourceList.add(sourcePath);
final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
distCpOptions.setMaxMaps(nMaps);
return distCpOptions;
}
use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.
the class TestDynamicInputFormat method testGetSplits.
@Test
public void testGetSplits() throws Exception {
DistCpOptions options = getOptions();
Configuration configuration = new Configuration();
configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<Text, CopyListingFileStatus>();
List<InputSplit> splits = inputFormat.getSplits(jobContext);
int nFiles = 0;
int taskId = 0;
for (InputSplit split : splits) {
StubContext stubContext = new StubContext(jobContext.getConfiguration(), null, taskId);
final TaskAttemptContext taskAttemptContext = stubContext.getContext();
RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
stubContext.setReader(recordReader);
recordReader.initialize(splits.get(0), taskAttemptContext);
float previousProgressValue = 0f;
while (recordReader.nextKeyValue()) {
CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
String source = fileStatus.getPath().toString();
System.out.println(source);
Assert.assertTrue(expectedFilePaths.contains(source));
final float progress = recordReader.getProgress();
Assert.assertTrue(progress >= previousProgressValue);
Assert.assertTrue(progress >= 0.0f);
Assert.assertTrue(progress <= 1.0f);
previousProgressValue = progress;
++nFiles;
}
Assert.assertTrue(recordReader.getProgress() == 1.0f);
++taskId;
}
Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.
the class TestCopyCommitter method testDeleteMissingFlatInterleavedFiles.
@Test
public void testDeleteMissingFlatInterleavedFiles() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String sourceBase;
String targetBase;
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
TestDistCpUtils.createFile(fs, sourceBase + "/1");
TestDistCpUtils.createFile(fs, sourceBase + "/3");
TestDistCpUtils.createFile(fs, sourceBase + "/4");
TestDistCpUtils.createFile(fs, sourceBase + "/5");
TestDistCpUtils.createFile(fs, sourceBase + "/7");
TestDistCpUtils.createFile(fs, sourceBase + "/8");
TestDistCpUtils.createFile(fs, sourceBase + "/9");
TestDistCpUtils.createFile(fs, targetBase + "/2");
TestDistCpUtils.createFile(fs, targetBase + "/4");
TestDistCpUtils.createFile(fs, targetBase + "/5");
TestDistCpUtils.createFile(fs, targetBase + "/7");
TestDistCpUtils.createFile(fs, targetBase + "/9");
TestDistCpUtils.createFile(fs, targetBase + "/A");
DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
options.setSyncFolder(true);
options.setDeleteMissing(true);
options.appendToConf(conf);
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
listing.buildListing(listingFile, options);
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
//Test for idempotent commit
committer.commitJob(jobContext);
if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
Assert.fail("Source and target folders are not in sync");
}
Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
} catch (IOException e) {
LOG.error("Exception encountered while testing for delete missing", e);
Assert.fail("Delete missing failure");
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
}
}
Aggregations