use of org.apache.hadoop.fs.LocalFileSystem in project tez by apache.
the class TestFetcher method verifyFetchSucceeded.
protected void verifyFetchSucceeded(FetcherCallback callback, CompositeInputAttemptIdentifier srcAttempId, Configuration conf) throws IOException {
String pathComponent = srcAttempId.getPathComponent();
int len = pathComponent.length();
long p = Long.valueOf(pathComponent.substring(len - 1, len));
ArgumentCaptor<LocalDiskFetchedInput> capturedFetchedInput = ArgumentCaptor.forClass(LocalDiskFetchedInput.class);
verify(callback).fetchSucceeded(eq(HOST), eq(srcAttempId.expand(0)), capturedFetchedInput.capture(), eq(p * 100), eq(p * 1000), anyLong());
LocalDiskFetchedInput f = capturedFetchedInput.getValue();
Assert.assertEquals("success callback filename", f.getInputFile().toString(), SHUFFLE_INPUT_FILE_PREFIX + pathComponent);
Assert.assertTrue("success callback fs", f.getLocalFS() instanceof LocalFileSystem);
Assert.assertEquals("success callback filesystem", f.getStartOffset(), p * 10);
Assert.assertEquals("success callback compressed size", f.getSize(), p * 100);
Assert.assertEquals("success callback input id", f.getInputAttemptIdentifier(), srcAttempId.expand(0));
Assert.assertEquals("success callback type", f.getType(), FetchedInput.Type.DISK_DIRECT);
}
use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.
the class VariableCPInstruction method writeScalarToHDFS.
/**
* Helper function to write scalars to HDFS based on its value type.
*
* @param ec execution context
* @param fname file name
*/
private void writeScalarToHDFS(ExecutionContext ec, String fname) {
try {
ScalarObject scalar = ec.getScalarInput(getInput1().getName(), getInput1().getValueType(), getInput1().isLiteral());
MapReduceTool.writeObjectToHDFS(scalar.getValue(), fname);
MapReduceTool.writeScalarMetaDataFile(fname + ".mtd", getInput1().getValueType());
FileSystem fs = IOUtilFunctions.getFileSystem(fname);
if (fs instanceof LocalFileSystem) {
Path path = new Path(fname);
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.
the class WriterTextCellParallel method writeTextCellMatrixToHDFS.
@Override
protected void writeTextCellMatrixToHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen) throws IOException {
// estimate output size and number of output blocks (min 1)
int numPartFiles = (int) (OptimizerUtils.estimateSizeTextOutput(src.getNumRows(), src.getNumColumns(), src.getNonZeros(), OutputInfo.TextCellOutputInfo) / InfrastructureAnalyzer.getHDFSBlockSize());
numPartFiles = Math.max(numPartFiles, 1);
// determine degree of parallelism
int numThreads = OptimizerUtils.getParallelTextWriteParallelism();
numThreads = Math.min(numThreads, numPartFiles);
// fall back to sequential write if dop is 1 (e.g., <128MB) in order to create single file
if (numThreads <= 1 || src.getNonZeros() == 0) {
super.writeTextCellMatrixToHDFS(path, job, fs, src, rlen, clen);
return;
}
// create directory for concurrent tasks
MapReduceTool.createDirIfNotExistOnHDFS(path, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
// create and execute tasks
try {
ExecutorService pool = CommonThreadPool.get(numThreads);
ArrayList<WriteTextTask> tasks = new ArrayList<>();
int blklen = (int) Math.ceil((double) rlen / numThreads);
for (int i = 0; i < numThreads & i * blklen < rlen; i++) {
Path newPath = new Path(path, IOUtilFunctions.getPartFileName(i));
tasks.add(new WriteTextTask(newPath, job, fs, src, i * blklen, (int) Math.min((i + 1) * blklen, rlen)));
}
// wait until all tasks have been executed
List<Future<Object>> rt = pool.invokeAll(tasks);
pool.shutdown();
// check for exceptions
for (Future<Object> task : rt) task.get();
// delete crc files if written to local file system
if (fs instanceof LocalFileSystem) {
for (int i = 0; i < numThreads & i * blklen < rlen; i++) IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, new Path(path, IOUtilFunctions.getPartFileName(i)));
}
} catch (Exception e) {
throw new IOException("Failed parallel write of text output.", e);
}
}
use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.
the class FrameWriterTextCellParallel method writeTextCellFrameToHDFS.
@Override
protected void writeTextCellFrameToHDFS(Path path, JobConf job, FrameBlock src, long rlen, long clen) throws IOException {
// estimate output size and number of output blocks (min 1)
int numPartFiles = Math.max((int) (OptimizerUtils.estimateSizeTextOutput(rlen, clen, rlen * clen, OutputInfo.TextCellOutputInfo) / InfrastructureAnalyzer.getHDFSBlockSize()), 1);
// determine degree of parallelism
int numThreads = OptimizerUtils.getParallelTextWriteParallelism();
numThreads = Math.min(numThreads, numPartFiles);
// fall back to sequential write if dop is 1 (e.g., <128MB) in order to create single file
if (numThreads <= 1) {
super.writeTextCellFrameToHDFS(path, job, src, rlen, clen);
return;
}
// create directory for concurrent tasks
MapReduceTool.createDirIfNotExistOnHDFS(path, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// create and execute tasks
try {
ExecutorService pool = CommonThreadPool.get(numThreads);
ArrayList<WriteFileTask> tasks = new ArrayList<>();
int blklen = (int) Math.ceil((double) rlen / numThreads);
for (int i = 0; i < numThreads & i * blklen < rlen; i++) {
Path newPath = new Path(path, IOUtilFunctions.getPartFileName(i));
tasks.add(new WriteFileTask(newPath, job, fs, src, i * blklen, (int) Math.min((i + 1) * blklen, rlen)));
}
// wait until all tasks have been executed
List<Future<Object>> rt = pool.invokeAll(tasks);
pool.shutdown();
// check for exceptions
for (Future<Object> task : rt) task.get();
// delete crc files if written to local file system
if (fs instanceof LocalFileSystem) {
for (int i = 0; i < numThreads & i * blklen < rlen; i++) IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, new Path(path, IOUtilFunctions.getPartFileName(i)));
}
} catch (Exception e) {
throw new IOException("Failed parallel write of text output.", e);
}
}
use of org.apache.hadoop.fs.LocalFileSystem in project systemml by apache.
the class IOUtilFunctions method deleteCrcFilesFromLocalFileSystem.
/**
* Delete the CRC files from the local file system associated with a
* particular file and its metadata file.
*
* @param fs
* the file system
* @param path
* the path to a file
* @throws IOException
* thrown if error occurred attempting to delete crc files
*/
public static void deleteCrcFilesFromLocalFileSystem(FileSystem fs, Path path) throws IOException {
if (fs instanceof LocalFileSystem) {
Path fnameCrc = new Path(path.getParent(), "." + path.getName() + ".crc");
fs.delete(fnameCrc, false);
Path fnameMtdCrc = new Path(path.getParent(), "." + path.getName() + ".mtd.crc");
fs.delete(fnameMtdCrc, false);
}
}
Aggregations