Search in sources :

Example 31 with DistCpOptions

use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.

the class TestOptionsParser method testParseNumListstatusThreads.

@Test
public void testParseNumListstatusThreads() {
    DistCpOptions options = OptionsParser.parse(new String[] { "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    // If command line argument isn't set, we expect .getNumListstatusThreads
    // option to be zero (so that we know when to override conf properties).
    Assert.assertEquals(0, options.getNumListstatusThreads());
    options = OptionsParser.parse(new String[] { "--numListstatusThreads", "12", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    Assert.assertEquals(12, options.getNumListstatusThreads());
    options = OptionsParser.parse(new String[] { "--numListstatusThreads", "0", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    Assert.assertEquals(0, options.getNumListstatusThreads());
    try {
        OptionsParser.parse(new String[] { "--numListstatusThreads", "hello", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
        Assert.fail("Non numberic numListstatusThreads parsed");
    } catch (IllegalArgumentException ignore) {
    }
    // Ignore large number of threads.
    options = OptionsParser.parse(new String[] { "--numListstatusThreads", "100", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    Assert.assertEquals(DistCpOptions.maxNumListstatusThreads, options.getNumListstatusThreads());
}
Also used : DistCpOptions(org.apache.hadoop.tools.DistCpOptions) Test(org.junit.Test)

Example 32 with DistCpOptions

use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.

the class TestOptionsParser method testOptionsAppendToConf.

@Test
public void testOptionsAppendToConf() {
    Configuration conf = new Configuration();
    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
    Assert.assertEquals(conf.getRaw(DistCpOptionSwitch.BANDWIDTH.getConfigLabel()), null);
    DistCpOptions options = OptionsParser.parse(new String[] { "-atomic", "-i", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    options.appendToConf(conf);
    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
    Assert.assertEquals(conf.getFloat(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1), -1.0, DELTA);
    conf = new Configuration();
    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), null);
    options = OptionsParser.parse(new String[] { "-update", "-delete", "-pu", "-bandwidth", "11.2", "hdfs://localhost:9820/source/first", "hdfs://localhost:9820/target/" });
    options.appendToConf(conf);
    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), "U");
    Assert.assertEquals(conf.getFloat(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1), 11.2, DELTA);
}
Also used : DistCpOptions(org.apache.hadoop.tools.DistCpOptions) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 33 with DistCpOptions

use of org.apache.hadoop.tools.DistCpOptions in project hadoop by apache.

the class CopyCommitter method deleteMissing.

// This method deletes "extra" files from the target, if they're not
// available at the source.
private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");
    // Sort the source-file listing alphabetically.
    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
    // Similarly, create the listing of target-files. Sort alphabetically.
    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);
    Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath).toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME) ? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH;
    DistCpOptions options = new DistCpOptions(targets, resultNonePath);
    //
    // Set up options to be the same from the CopyListing.buildListing's perspective,
    // so to collect similar listings as when doing the copy
    //
    options.setOverwrite(overwrite);
    options.setSyncFolder(syncFolder);
    options.setTargetPathExists(targetPathExists);
    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sortedSourceListing));
    SequenceFile.Reader targetReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sortedTargetListing));
    // Walk both source and target file listings.
    // Delete all from target that doesn't also exist on source.
    long deletedEntries = 0;
    try {
        CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
        Text srcRelPath = new Text();
        CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
        Text trgtRelPath = new Text();
        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            // Skip sources that don't exist on target.
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }
            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;
            // Target doesn't exist at source. Delete.
            boolean result = targetFS.delete(trgtFileStatus.getPath(), true) || !targetFS.exists(trgtFileStatus.getPath());
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            taskAttemptContext.progress();
            taskAttemptContext.setStatus("Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) CopyListing(org.apache.hadoop.tools.CopyListing) GlobbedCopyListing(org.apache.hadoop.tools.GlobbedCopyListing) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) SequenceFile(org.apache.hadoop.io.SequenceFile) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) GlobbedCopyListing(org.apache.hadoop.tools.GlobbedCopyListing)

Example 34 with DistCpOptions

use of org.apache.hadoop.tools.DistCpOptions in project hive by apache.

the class HdfsUtils method runDistCp.

public static boolean runDistCp(List<Path> srcPaths, Path dst, Configuration conf) throws IOException {
    DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst).withSyncFolder(true).withCRC(true).preserve(FileAttribute.BLOCKSIZE).build();
    // Creates the command-line parameters for distcp
    List<String> params = constructDistCpParams(srcPaths, dst, conf);
    try {
        conf.setBoolean("mapred.mapper.new-api", true);
        DistCp distcp = new DistCp(conf, options);
        // added by HADOOP-10459
        if (distcp.run(params.toArray(new String[params.size()])) == 0) {
            return true;
        } else {
            return false;
        }
    } catch (Exception e) {
        throw new IOException("Cannot execute DistCp process: " + e, e);
    } finally {
        conf.setBoolean("mapred.mapper.new-api", false);
    }
}
Also used : DistCpOptions(org.apache.hadoop.tools.DistCpOptions) DistCp(org.apache.hadoop.tools.DistCp) IOException(java.io.IOException) LoginException(javax.security.auth.login.LoginException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException)

Aggregations

DistCpOptions (org.apache.hadoop.tools.DistCpOptions)34 Test (org.junit.Test)22 Path (org.apache.hadoop.fs.Path)13 Configuration (org.apache.hadoop.conf.Configuration)10 IOException (java.io.IOException)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)5 CopyListing (org.apache.hadoop.tools.CopyListing)4 DistCp (org.apache.hadoop.tools.DistCp)4 GlobbedCopyListing (org.apache.hadoop.tools.GlobbedCopyListing)4 FileNotFoundException (java.io.FileNotFoundException)3 ArrayList (java.util.ArrayList)3 Text (org.apache.hadoop.io.Text)3 CopyListingFileStatus (org.apache.hadoop.tools.CopyListingFileStatus)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)2 MalformedURLException (java.net.MalformedURLException)2 AccessControlException (java.security.AccessControlException)2 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)2 StubContext (org.apache.hadoop.tools.StubContext)2 NoSuchElementException (java.util.NoSuchElementException)1