Search in sources :

Example 26 with FileContext

use of org.apache.hadoop.fs.FileContext in project hadoop by apache.

the class TestCheckpoint method testSaveNamespace.

/**
   * Tests save namespace.
   */
@Test
public void testSaveNamespace() throws IOException {
    MiniDFSCluster cluster = null;
    DistributedFileSystem fs = null;
    FileContext fc;
    try {
        Configuration conf = new HdfsConfiguration();
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).format(true).build();
        cluster.waitActive();
        fs = (cluster.getFileSystem());
        fc = FileContext.getFileContext(cluster.getURI(0));
        // Saving image without safe mode should fail
        DFSAdmin admin = new DFSAdmin(conf);
        String[] args = new String[] { "-saveNamespace" };
        try {
            admin.run(args);
        } catch (IOException eIO) {
            assertTrue(eIO.getLocalizedMessage().contains("Safe mode should be turned ON"));
        } catch (Exception e) {
            throw new IOException(e);
        }
        // create new file
        Path file = new Path("namespace.dat");
        DFSTestUtil.createFile(fs, file, fileSize, fileSize, blockSize, replication, seed);
        checkFile(fs, file, replication);
        // create new link
        Path symlink = new Path("file.link");
        fc.createSymlink(file, symlink, false);
        assertTrue(fc.getFileLinkStatus(symlink).isSymlink());
        // verify that the edits file is NOT empty
        Collection<URI> editsDirs = cluster.getNameEditsDirs(0);
        for (URI uri : editsDirs) {
            File ed = new File(uri.getPath());
            assertTrue(new File(ed, "current/" + NNStorage.getInProgressEditsFileName(1)).length() > Integer.SIZE / Byte.SIZE);
        }
        // Saving image in safe mode should succeed
        fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
        try {
            admin.run(args);
        } catch (Exception e) {
            throw new IOException(e);
        }
        // TODO: Fix the test to not require a hard-coded transaction count.
        final int EXPECTED_TXNS_FIRST_SEG = 13;
        //
        for (URI uri : editsDirs) {
            File ed = new File(uri.getPath());
            File curDir = new File(ed, "current");
            LOG.info("Files in " + curDir + ":\n  " + Joiner.on("\n  ").join(curDir.list()));
            // Verify that the first edits file got finalized
            File originalEdits = new File(curDir, NNStorage.getInProgressEditsFileName(1));
            assertFalse(originalEdits.exists());
            File finalizedEdits = new File(curDir, NNStorage.getFinalizedEditsFileName(1, EXPECTED_TXNS_FIRST_SEG));
            GenericTestUtils.assertExists(finalizedEdits);
            assertTrue(finalizedEdits.length() > Integer.SIZE / Byte.SIZE);
            GenericTestUtils.assertExists(new File(ed, "current/" + NNStorage.getInProgressEditsFileName(EXPECTED_TXNS_FIRST_SEG + 1)));
        }
        Collection<URI> imageDirs = cluster.getNameDirs(0);
        for (URI uri : imageDirs) {
            File imageDir = new File(uri.getPath());
            File savedImage = new File(imageDir, "current/" + NNStorage.getImageFileName(EXPECTED_TXNS_FIRST_SEG));
            assertTrue("Should have saved image at " + savedImage, savedImage.exists());
        }
        // restart cluster and verify file exists
        cluster.shutdown();
        cluster = null;
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).format(false).build();
        cluster.waitActive();
        fs = (cluster.getFileSystem());
        checkFile(fs, file, replication);
        fc = FileContext.getFileContext(cluster.getURI(0));
        assertTrue(fc.getFileLinkStatus(symlink).isSymlink());
    } finally {
        if (fs != null)
            fs.close();
        cleanup(cluster);
        cluster = null;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) IOException(java.io.IOException) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) URI(java.net.URI) Util.fileAsURI(org.apache.hadoop.hdfs.server.common.Util.fileAsURI) ExitException(org.apache.hadoop.util.ExitUtil.ExitException) ParseException(org.apache.commons.cli.ParseException) IOException(java.io.IOException) DFSAdmin(org.apache.hadoop.hdfs.tools.DFSAdmin) RandomAccessFile(java.io.RandomAccessFile) EditLogFile(org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile) NameNodeFile(org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile) File(java.io.File) FileContext(org.apache.hadoop.fs.FileContext) Test(org.junit.Test)

Example 27 with FileContext

use of org.apache.hadoop.fs.FileContext in project hadoop by apache.

the class TestDFSClientFailover method testFileContextDoesntDnsResolveLogicalURI.

/**
   * Same test as above, but for FileContext.
   */
@Test
public void testFileContextDoesntDnsResolveLogicalURI() throws Exception {
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    NameService spyNS = spyOnNameService();
    String logicalHost = fs.getUri().getHost();
    Configuration haClientConf = fs.getConf();
    FileContext fc = FileContext.getFileContext(haClientConf);
    Path root = new Path("/");
    fc.listStatus(root);
    fc.listStatus(fc.makeQualified(root));
    fc.getDefaultFileSystem().getCanonicalServiceName();
    // Ensure that the logical hostname was never resolved.
    Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
}
Also used : Path(org.apache.hadoop.fs.Path) NameService(sun.net.spi.nameservice.NameService) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) FileContext(org.apache.hadoop.fs.FileContext) Test(org.junit.Test)

Example 28 with FileContext

use of org.apache.hadoop.fs.FileContext in project tez by apache.

the class TestTezLocalCacheManager method createFile.

// create a temporary file with the given content and return a LocalResource
private static LocalResource createFile(String content) throws IOException {
    FileContext fs = FileContext.getLocalFSFileContext();
    java.nio.file.Path tempFile = Files.createTempFile("test-cache-manager", ".txt");
    File temp = tempFile.toFile();
    temp.deleteOnExit();
    Path p = new Path("file:///" + tempFile.toAbsolutePath().toString());
    Files.write(tempFile, content.getBytes());
    RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
    LocalResource ret = recordFactory.newRecordInstance(LocalResource.class);
    URL yarnUrlFromPath = ConverterUtils.getYarnUrlFromPath(p);
    ret.setResource(yarnUrlFromPath);
    ret.setSize(content.getBytes().length);
    ret.setType(LocalResourceType.FILE);
    ret.setVisibility(LocalResourceVisibility.PRIVATE);
    ret.setTimestamp(fs.getFileStatus(p).getModificationTime());
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) RecordFactory(org.apache.hadoop.yarn.factories.RecordFactory) File(java.io.File) FileContext(org.apache.hadoop.fs.FileContext) URL(org.apache.hadoop.yarn.api.records.URL) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource)

Example 29 with FileContext

use of org.apache.hadoop.fs.FileContext in project cdap by caskdata.

the class DynamicPartitioningOutputCommitter method commitJob.

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);
    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    outputDataset = taskContext.getDataset(outputDatasetName);
    DynamicPartitioner.PartitionWriteOption partitionWriteOption = DynamicPartitioner.PartitionWriteOption.valueOf(configuration.get(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_WRITE_OPTION));
    Partitioning partitioning = outputDataset.getPartitioning();
    partitionsToAdd = new HashMap<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);
            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE) {
                if (fs.exists(finalDir)) {
                    throw new FileAlreadyExistsException("Final output path already exists: " + finalDir);
                }
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.put(relativeDir, partitionKey);
        }
    }
    // need to remove any existing partitions, before moving temporary content to final output
    if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_OVERWRITE) {
        for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
            if (outputDataset.getPartition(entry.getValue()) != null) {
                // this allows reinstating the existing files if there's a rollback.
                // alternative is to simply remove the files within the partition's location
                // upside to that is easily avoiding explore operations. one downside is that metadata is not removed then
                outputDataset.dropPartition(entry.getValue());
            }
        }
    }
    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileContext fc = FileContext.getFileContext(configuration);
    // the finalOutput path doesn't have scheme or authority (but 'from' does)
    finalOutput = fc.makeQualified(finalOutput);
    for (FileStatus from : getAllCommittedTaskPaths(context)) {
        mergePaths(fc, from, finalOutput);
    }
    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
    boolean allowAppend = partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND;
    // create all the necessary partitions
    for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
        outputDataset.addPartition(entry.getValue(), entry.getKey(), metadata, true, allowAppend);
    }
    // delete the job-specific _temporary folder
    cleanupJob(context);
    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : partitionsToAdd.keySet()) {
            Path pathToMark = new Path(finalOutput, relativePath);
            createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME));
            // also create a _SUCCESS-<RunId>, if allowing append
            if (allowAppend) {
                createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME + "-" + taskContext.getProgramRunId().getRun()));
            }
        }
    }
}
Also used : BasicMapReduceTaskContext(co.cask.cdap.internal.app.runtime.batch.BasicMapReduceTaskContext) Path(org.apache.hadoop.fs.Path) MapReduceClassLoader(co.cask.cdap.internal.app.runtime.batch.MapReduceClassLoader) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Configuration(org.apache.hadoop.conf.Configuration) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Partitioning(co.cask.cdap.api.dataset.lib.Partitioning) FileSystem(org.apache.hadoop.fs.FileSystem) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) DynamicPartitioner(co.cask.cdap.api.dataset.lib.DynamicPartitioner) HashMap(java.util.HashMap) Map(java.util.Map) FileContext(org.apache.hadoop.fs.FileContext)

Example 30 with FileContext

use of org.apache.hadoop.fs.FileContext in project cdap by caskdata.

the class MapReduceContainerHelper method getFrameworkURI.

/**
 * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
 *
 * @param hConf the job configuration
 * @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
 */
@Nullable
private static URI getFrameworkURI(Configuration hConf) {
    String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
    if (framework == null) {
        return null;
    }
    try {
        // Parse the path. It can contains '#' to represent the localized file name
        URI uri = new URI(framework);
        String localizedName = LocalizationUtils.getLocalizedName(uri);
        // The following resolution logic is copied from JobSubmitter in MR.
        FileSystem fs = FileSystem.get(hConf);
        Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
        FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
        frameworkPath = fc.resolvePath(frameworkPath);
        uri = frameworkPath.toUri();
        return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, localizedName);
    } catch (URISyntaxException e) {
        LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    } catch (IOException e) {
        LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    }
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) URI(java.net.URI) FileContext(org.apache.hadoop.fs.FileContext) Nullable(javax.annotation.Nullable)

Aggregations

FileContext (org.apache.hadoop.fs.FileContext)84 Path (org.apache.hadoop.fs.Path)71 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)33 IOException (java.io.IOException)29 File (java.io.File)16 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HashMap (java.util.HashMap)12 FsPermission (org.apache.hadoop.fs.permission.FsPermission)10 ArrayList (java.util.ArrayList)9 FileSystem (org.apache.hadoop.fs.FileSystem)8 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)8 ExecutorService (java.util.concurrent.ExecutorService)7 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)7 URISyntaxException (java.net.URISyntaxException)6 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 ExecutionException (java.util.concurrent.ExecutionException)6 Future (java.util.concurrent.Future)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6