Search in sources :

Example 86 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project tez by apache.

the class TestFetcher method verifyFetchSucceeded.

protected void verifyFetchSucceeded(FetcherCallback callback, CompositeInputAttemptIdentifier srcAttempId, Configuration conf) throws IOException {
    String pathComponent = srcAttempId.getPathComponent();
    int len = pathComponent.length();
    long p = Long.valueOf(pathComponent.substring(len - 1, len));
    ArgumentCaptor<LocalDiskFetchedInput> capturedFetchedInput = ArgumentCaptor.forClass(LocalDiskFetchedInput.class);
    verify(callback).fetchSucceeded(eq(HOST), eq(srcAttempId.expand(0)), capturedFetchedInput.capture(), eq(p * 100), eq(p * 1000), anyLong());
    LocalDiskFetchedInput f = capturedFetchedInput.getValue();
    Assert.assertEquals("success callback filename", f.getInputFile().toString(), SHUFFLE_INPUT_FILE_PREFIX + pathComponent);
    Assert.assertTrue("success callback fs", f.getLocalFS() instanceof LocalFileSystem);
    Assert.assertEquals("success callback filesystem", f.getStartOffset(), p * 10);
    Assert.assertEquals("success callback compressed size", f.getSize(), p * 100);
    Assert.assertEquals("success callback input id", f.getInputAttemptIdentifier(), srcAttempId.expand(0));
    Assert.assertEquals("success callback type", f.getType(), FetchedInput.Type.DISK_DIRECT);
Also used : LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Matchers.anyString(org.mockito.Matchers.anyString)

Example 87 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.

the class VariableCPInstruction method writeScalarToHDFS.

 * Helper function to write scalars to HDFS based on its value type.
 * @param ec execution context
 * @param fname file name
private void writeScalarToHDFS(ExecutionContext ec, String fname) {
    try {
        ScalarObject scalar = ec.getScalarInput(getInput1().getName(), getInput1().getValueType(), getInput1().isLiteral());
        MapReduceTool.writeObjectToHDFS(scalar.getValue(), fname);
        MapReduceTool.writeScalarMetaDataFile(fname + ".mtd", getInput1().getValueType());
        FileSystem fs = IOUtilFunctions.getFileSystem(fname);
        if (fs instanceof LocalFileSystem) {
            Path path = new Path(fname);
            IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
    } catch (IOException e) {
        throw new DMLRuntimeException(e);
Also used : Path(org.apache.hadoop.fs.Path) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException( DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 88 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.

the class WriterTextCellParallel method writeTextCellMatrixToHDFS.

protected void writeTextCellMatrixToHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen) throws IOException {
    // estimate output size and number of output blocks (min 1)
    int numPartFiles = (int) (OptimizerUtils.estimateSizeTextOutput(src.getNumRows(), src.getNumColumns(), src.getNonZeros(), OutputInfo.TextCellOutputInfo) / InfrastructureAnalyzer.getHDFSBlockSize());
    numPartFiles = Math.max(numPartFiles, 1);
    // determine degree of parallelism
    int numThreads = OptimizerUtils.getParallelTextWriteParallelism();
    numThreads = Math.min(numThreads, numPartFiles);
    // fall back to sequential write if dop is 1 (e.g., <128MB) in order to create single file
    if (numThreads <= 1 || src.getNonZeros() == 0) {
        super.writeTextCellMatrixToHDFS(path, job, fs, src, rlen, clen);
    // create directory for concurrent tasks
    MapReduceTool.createDirIfNotExistOnHDFS(path, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
    // create and execute tasks
    try {
        ExecutorService pool = CommonThreadPool.get(numThreads);
        ArrayList<WriteTextTask> tasks = new ArrayList<>();
        int blklen = (int) Math.ceil((double) rlen / numThreads);
        for (int i = 0; i < numThreads & i * blklen < rlen; i++) {
            Path newPath = new Path(path, IOUtilFunctions.getPartFileName(i));
            tasks.add(new WriteTextTask(newPath, job, fs, src, i * blklen, (int) Math.min((i + 1) * blklen, rlen)));
        // wait until all tasks have been executed
        List<Future<Object>> rt = pool.invokeAll(tasks);
        // check for exceptions
        for (Future<Object> task : rt) task.get();
        // delete crc files if written to local file system
        if (fs instanceof LocalFileSystem) {
            for (int i = 0; i < numThreads & i * blklen < rlen; i++) IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, new Path(path, IOUtilFunctions.getPartFileName(i)));
    } catch (Exception e) {
        throw new IOException("Failed parallel write of text output.", e);
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) IOException( IOException( LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 89 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.

the class FrameWriterTextCellParallel method writeTextCellFrameToHDFS.

protected void writeTextCellFrameToHDFS(Path path, JobConf job, FrameBlock src, long rlen, long clen) throws IOException {
    // estimate output size and number of output blocks (min 1)
    int numPartFiles = Math.max((int) (OptimizerUtils.estimateSizeTextOutput(rlen, clen, rlen * clen, OutputInfo.TextCellOutputInfo) / InfrastructureAnalyzer.getHDFSBlockSize()), 1);
    // determine degree of parallelism
    int numThreads = OptimizerUtils.getParallelTextWriteParallelism();
    numThreads = Math.min(numThreads, numPartFiles);
    // fall back to sequential write if dop is 1 (e.g., <128MB) in order to create single file
    if (numThreads <= 1) {
        super.writeTextCellFrameToHDFS(path, job, src, rlen, clen);
    // create directory for concurrent tasks
    MapReduceTool.createDirIfNotExistOnHDFS(path, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // create and execute tasks
    try {
        ExecutorService pool = CommonThreadPool.get(numThreads);
        ArrayList<WriteFileTask> tasks = new ArrayList<>();
        int blklen = (int) Math.ceil((double) rlen / numThreads);
        for (int i = 0; i < numThreads & i * blklen < rlen; i++) {
            Path newPath = new Path(path, IOUtilFunctions.getPartFileName(i));
            tasks.add(new WriteFileTask(newPath, job, fs, src, i * blklen, (int) Math.min((i + 1) * blklen, rlen)));
        // wait until all tasks have been executed
        List<Future<Object>> rt = pool.invokeAll(tasks);
        // check for exceptions
        for (Future<Object> task : rt) task.get();
        // delete crc files if written to local file system
        if (fs instanceof LocalFileSystem) {
            for (int i = 0; i < numThreads & i * blklen < rlen; i++) IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, new Path(path, IOUtilFunctions.getPartFileName(i)));
    } catch (Exception e) {
        throw new IOException("Failed parallel write of text output.", e);
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) IOException( IOException( LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 90 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.

the class IOUtilFunctions method deleteCrcFilesFromLocalFileSystem.

 * Delete the CRC files from the local file system associated with a
 * particular file and its metadata file.
 * @param fs
 *            the file system
 * @param path
 *            the path to a file
 * @throws IOException
 *             thrown if error occurred attempting to delete crc files
public static void deleteCrcFilesFromLocalFileSystem(FileSystem fs, Path path) throws IOException {
    if (fs instanceof LocalFileSystem) {
        Path fnameCrc = new Path(path.getParent(), "." + path.getName() + ".crc");
        fs.delete(fnameCrc, false);
        Path fnameMtdCrc = new Path(path.getParent(), "." + path.getName() + ".mtd.crc");
        fs.delete(fnameMtdCrc, false);
Also used : Path(org.apache.hadoop.fs.Path) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem)


LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)120 Path (org.apache.hadoop.fs.Path)77 Test (org.junit.Test)63 Configuration (org.apache.hadoop.conf.Configuration)56 FileSystem (org.apache.hadoop.fs.FileSystem)35 IOException ( File ( NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)23 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)23 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)23 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)23 Key ( Value ( ArrayList (java.util.ArrayList)19 ExecutorService (java.util.concurrent.ExecutorService)15 Future (java.util.concurrent.Future)15 Scanner (org.apache.accumulo.core.client.Scanner)14 DataSegment (org.apache.druid.timeline.DataSegment)13 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)8 HdfsDataSegmentPusher (