Search in sources :

Example 36 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project incubator-gobblin by apache.

the class MRJobLauncher method addJars.

/**
 * Add framework or job-specific jars to the classpath through DistributedCache
 * so the mappers can use them.
 */
@SuppressWarnings("deprecation")
private void addJars(Path jarFileDir, String jarFileList, Configuration conf) throws IOException {
    LocalFileSystem lfs = FileSystem.getLocal(conf);
    for (String jarFile : SPLITTER.split(jarFileList)) {
        Path srcJarFile = new Path(jarFile);
        FileStatus[] fileStatusList = lfs.globStatus(srcJarFile);
        for (FileStatus status : fileStatusList) {
            // For each FileStatus there are chances it could fail in copying at the first attempt, due to file-existence
            // or file-copy is ongoing by other job instance since all Gobblin jobs share the same jar file directory.
            // the retryCount is to avoid cases (if any) where retry is going too far and causes job hanging.
            int retryCount = 0;
            boolean shouldFileBeAddedIntoDC = true;
            Path destJarFile = calculateDestJarFile(status, jarFileDir);
            // Adding destJarFile into HDFS until it exists and the size of file on targetPath matches the one on local path.
            while (!this.fs.exists(destJarFile) || fs.getFileStatus(destJarFile).getLen() != status.getLen()) {
                try {
                    if (this.fs.exists(destJarFile) && fs.getFileStatus(destJarFile).getLen() != status.getLen()) {
                        Thread.sleep(WAITING_TIME_ON_IMCOMPLETE_UPLOAD);
                        throw new IOException("Waiting for file to complete on uploading ... ");
                    }
                    // Set the first parameter as false for not deleting sourceFile
                    // Set the second parameter as false for not overwriting existing file on the target, by default it is true.
                    // If the file is preExisted but overwrite flag set to false, then an IOException if thrown.
                    this.fs.copyFromLocalFile(false, false, status.getPath(), destJarFile);
                } catch (IOException | InterruptedException e) {
                    LOG.warn("Path:" + destJarFile + " is not copied successfully. Will require retry.");
                    retryCount += 1;
                    if (retryCount >= this.jarFileMaximumRetry) {
                        LOG.error("The jar file:" + destJarFile + "failed in being copied into hdfs", e);
                        // If retry reaches upper limit, skip copying this file.
                        shouldFileBeAddedIntoDC = false;
                        break;
                    }
                }
            }
            if (shouldFileBeAddedIntoDC) {
                // Then add the jar file on HDFS to the classpath
                LOG.info(String.format("Adding %s to classpath", destJarFile));
                DistributedCache.addFileToClassPath(destJarFile, conf, this.fs);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException(java.io.IOException)

Example 37 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project incubator-gobblin by apache.

the class MRCompactor method copyDependencyJarsToHdfs.

/**
 * Copy dependency jars from local fs to HDFS.
 */
private void copyDependencyJarsToHdfs() throws IOException {
    if (!this.state.contains(ConfigurationKeys.JOB_JAR_FILES_KEY)) {
        return;
    }
    LocalFileSystem lfs = FileSystem.getLocal(this.conf);
    Path tmpJarFileDir = new Path(this.tmpOutputDir, "_gobblin_compaction_jars");
    this.state.setProp(COMPACTION_JARS, tmpJarFileDir.toString());
    this.fs.delete(tmpJarFileDir, true);
    for (String jarFile : this.state.getPropAsList(ConfigurationKeys.JOB_JAR_FILES_KEY)) {
        for (FileStatus status : lfs.globStatus(new Path(jarFile))) {
            Path tmpJarFile = new Path(this.fs.makeQualified(tmpJarFileDir), status.getPath().getName());
            this.fs.copyFromLocalFile(status.getPath(), tmpJarFile);
            LOG.info(String.format("%s will be added to classpath", tmpJarFile));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem)

Example 38 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project incubator-gobblin by apache.

the class CompactionSource method copyJarDependencies.

/**
 * Copy dependent jars to a temporary job directory on HDFS
 */
private void copyJarDependencies(State state) throws IOException {
    if (this.tmpJobDir == null) {
        throw new RuntimeException("Job directory is not created");
    }
    if (!state.contains(ConfigurationKeys.JOB_JAR_FILES_KEY)) {
        return;
    }
    // create sub-dir to save jar files
    LocalFileSystem lfs = FileSystem.getLocal(HadoopUtils.getConfFromState(state));
    Path tmpJarFileDir = new Path(this.tmpJobDir, MRCompactor.COMPACTION_JAR_SUBDIR);
    this.fs.mkdirs(tmpJarFileDir);
    state.setProp(MRCompactor.COMPACTION_JARS, tmpJarFileDir.toString());
    // copy jar files to hdfs
    for (String jarFile : state.getPropAsList(ConfigurationKeys.JOB_JAR_FILES_KEY)) {
        for (FileStatus status : lfs.globStatus(new Path(jarFile))) {
            Path tmpJarFile = new Path(this.fs.makeQualified(tmpJarFileDir), status.getPath().getName());
            this.fs.copyFromLocalFile(status.getPath(), tmpJarFile);
            log.info(String.format("%s will be added to classpath", tmpJarFile));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem)

Example 39 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project incubator-gobblin by apache.

the class HadoopUtils method renamePathHandleLocalFSRace.

/**
 * Renames a src {@link Path} on fs {@link FileSystem} to a dst {@link Path}. If fs is a {@link LocalFileSystem} and
 * src is a directory then {@link File#renameTo} is called directly to avoid a directory rename race condition where
 * {@link org.apache.hadoop.fs.RawLocalFileSystem#rename} copies the conflicting src directory into dst resulting in
 * an extra nested level, such as /root/a/b/c/e/e where e is repeated.
 *
 * @param fs the {@link FileSystem} where the src {@link Path} exists
 * @param src the source {@link Path} which will be renamed
 * @param dst the {@link Path} to rename to
 * @return true if rename succeeded, false if rename failed.
 * @throws IOException if rename failed for reasons other than target exists.
 */
public static boolean renamePathHandleLocalFSRace(FileSystem fs, Path src, Path dst) throws IOException {
    if (DecoratorUtils.resolveUnderlyingObject(fs) instanceof LocalFileSystem && fs.isDirectory(src)) {
        LocalFileSystem localFs = (LocalFileSystem) DecoratorUtils.resolveUnderlyingObject(fs);
        File srcFile = localFs.pathToFile(src);
        File dstFile = localFs.pathToFile(dst);
        return srcFile.renameTo(dstFile);
    } else {
        return fs.rename(src, dst);
    }
}
Also used : RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) File(java.io.File)

Example 40 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project incubator-gobblin by apache.

the class InstrumentedLocalFileSystemTest method testFromConfigurationOverride.

@Test
public void testFromConfigurationOverride() throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.file.impl", InstrumentedLocalFileSystem.class.getName());
    FileSystem fs = FileSystem.newInstance(new URI("file:///"), configuration);
    Assert.assertTrue(fs instanceof InstrumentedLocalFileSystem);
    Assert.assertTrue(DecoratorUtils.resolveUnderlyingObject(fs) instanceof LocalFileSystem);
    Assert.assertEquals(fs.getFileStatus(new Path("/tmp")).getPath(), new Path("file:///tmp"));
    Assert.assertEquals(fs.getUri().getScheme(), "file");
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) URI(java.net.URI) Test(org.testng.annotations.Test)

Aggregations

LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)121 Path (org.apache.hadoop.fs.Path)77 Test (org.junit.Test)64 Configuration (org.apache.hadoop.conf.Configuration)57 FileSystem (org.apache.hadoop.fs.FileSystem)35 IOException (java.io.IOException)33 File (java.io.File)23 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)23 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)23 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)23 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)23 Key (org.apache.accumulo.core.data.Key)22 Value (org.apache.accumulo.core.data.Value)22 ArrayList (java.util.ArrayList)19 ExecutorService (java.util.concurrent.ExecutorService)15 Future (java.util.concurrent.Future)15 Scanner (org.apache.accumulo.core.client.Scanner)14 DataSegment (org.apache.druid.timeline.DataSegment)13 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)8 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)8