Search in sources :

Example 1 with CopyMapper

use of org.apache.hadoop.tools.mapred.CopyMapper in project hadoop by apache.

the class TestDistCpSyncReverseBase method testSync.

/**
   * Test the basic functionality.
   */
@Test
public void testSync() throws Exception {
    if (isSrcNotSameAsTgt) {
        initData(source);
    }
    initData(target);
    enableAndCreateFirstSnapshot();
    final FsShell shell = new FsShell(conf);
    lsrSource("Before source: ", shell, source);
    lsr("Before target: ", shell, target);
    // make changes under target
    int numDeletedModified = changeData(target);
    createSecondSnapshotAtTarget();
    SnapshotDiffReport report = dfs.getSnapshotDiffReport(target, "s2", "s1");
    System.out.println(report);
    DistCpSync distCpSync = new DistCpSync(options, conf);
    lsr("Before sync target: ", shell, target);
    // do the sync
    Assert.assertTrue(distCpSync.sync());
    lsr("After sync target: ", shell, target);
    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s1");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));
    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);
    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
        copyMapper.map(entry.getKey(), entry.getValue(), context);
    }
    lsrSource("After mapper source: ", shell, source);
    lsr("After mapper target: ", shell, target);
    // verify that we only list modified and created files/directories
    Assert.assertEquals(numDeletedModified, copyListing.size());
    // verify that we only copied new appended data of f2 and the new file f1
    Assert.assertEquals(blockSize * 3, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) FsShell(org.apache.hadoop.fs.FsShell) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) SnapshotDiffReport(org.apache.hadoop.hdfs.protocol.SnapshotDiffReport) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) HashMap(java.util.HashMap) Map(java.util.Map) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 2 with CopyMapper

use of org.apache.hadoop.tools.mapred.CopyMapper in project hadoop by apache.

the class TestDistCpSyncReverseBase method testAndVerify.

private void testAndVerify(int numDeletedAndModified) throws Exception {
    SnapshotDiffReport report = dfs.getSnapshotDiffReport(target, "s2", "s1");
    System.out.println(report);
    final FsShell shell = new FsShell(conf);
    lsrSource("Before sync source: ", shell, source);
    lsr("Before sync target: ", shell, target);
    DistCpSync distCpSync = new DistCpSync(options, conf);
    // do the sync
    distCpSync.sync();
    lsr("After sync target: ", shell, target);
    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s1");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));
    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);
    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
        copyMapper.map(entry.getKey(), entry.getValue(), context);
    }
    // verify that we only list modified and created files/directories
    Assert.assertEquals(numDeletedAndModified, copyListing.size());
    lsr("After Copy target: ", shell, target);
    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) FsShell(org.apache.hadoop.fs.FsShell) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) SnapshotDiffReport(org.apache.hadoop.hdfs.protocol.SnapshotDiffReport) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) HashMap(java.util.HashMap) Map(java.util.Map) Credentials(org.apache.hadoop.security.Credentials)

Example 3 with CopyMapper

use of org.apache.hadoop.tools.mapred.CopyMapper in project hadoop by apache.

the class TestDistCpSync method testSync.

/**
   * Test the basic functionality.
   */
@Test
public void testSync() throws Exception {
    initData(source);
    initData(target);
    enableAndCreateFirstSnapshot();
    // make changes under source
    int numCreatedModified = changeData(source);
    dfs.createSnapshot(source, "s2");
    // before sync, make some further changes on source. this should not affect
    // the later distcp since we're copying (s2-s1) to target
    final Path toDelete = new Path(source, "foo/d1/foo/f1");
    dfs.delete(toDelete, true);
    final Path newdir = new Path(source, "foo/d1/foo/newdir");
    dfs.mkdirs(newdir);
    SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
    System.out.println(report);
    DistCpSync distCpSync = new DistCpSync(options, conf);
    // do the sync
    Assert.assertTrue(distCpSync.sync());
    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));
    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);
    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
        copyMapper.map(entry.getKey(), entry.getValue(), context);
    }
    // verify that we only list modified and created files/directories
    Assert.assertEquals(numCreatedModified, copyListing.size());
    // verify that we only copied new appended data of f2 and the new file f1
    Assert.assertEquals(BLOCK_SIZE * 3, stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) SnapshotDiffReport(org.apache.hadoop.hdfs.protocol.SnapshotDiffReport) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) HashMap(java.util.HashMap) Map(java.util.Map) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 4 with CopyMapper

use of org.apache.hadoop.tools.mapred.CopyMapper in project hadoop by apache.

the class TestDistCpSync method testAndVerify.

private void testAndVerify(int numCreatedModified) throws Exception {
    SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2");
    System.out.println(report);
    DistCpSync distCpSync = new DistCpSync(options, conf);
    // do the sync
    Assert.assertTrue(distCpSync.sync());
    // make sure the source path has been updated to the snapshot path
    final Path spath = new Path(source, HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
    Assert.assertEquals(spath, options.getSourcePaths().get(0));
    // build copy listing
    final Path listingPath = new Path("/tmp/META/fileList.seq");
    CopyListing listing = new SimpleCopyListing(conf, new Credentials(), distCpSync);
    listing.buildListing(listingPath, options);
    Map<Text, CopyListingFileStatus> copyListing = getListing(listingPath);
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(conf, null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context<Text, CopyListingFileStatus, Text, Text> context = stubContext.getContext();
    // Enable append
    context.getConfiguration().setBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), true);
    copyMapper.setup(context);
    for (Map.Entry<Text, CopyListingFileStatus> entry : copyListing.entrySet()) {
        copyMapper.map(entry.getKey(), entry.getValue(), context);
    }
    // verify that we only list modified and created files/directories
    Assert.assertEquals(numCreatedModified, copyListing.size());
    // verify the source and target now has the same structure
    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) SnapshotDiffReport(org.apache.hadoop.hdfs.protocol.SnapshotDiffReport) CopyMapper(org.apache.hadoop.tools.mapred.CopyMapper) HashMap(java.util.HashMap) Map(java.util.Map) Credentials(org.apache.hadoop.security.Credentials)

Aggregations

HashMap (java.util.HashMap)4 Map (java.util.Map)4 Path (org.apache.hadoop.fs.Path)4 SnapshotDiffReport (org.apache.hadoop.hdfs.protocol.SnapshotDiffReport)4 Text (org.apache.hadoop.io.Text)4 Mapper (org.apache.hadoop.mapreduce.Mapper)4 Credentials (org.apache.hadoop.security.Credentials)4 CopyMapper (org.apache.hadoop.tools.mapred.CopyMapper)4 FsShell (org.apache.hadoop.fs.FsShell)2 Test (org.junit.Test)2