use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestDataTransferProtocol method readFile.
void readFile(FileSystem fs, Path path, int fileLen) throws IOException {
byte[] arr = new byte[fileLen];
FSDataInputStream in = fs.open(path);
in.readFully(arr);
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestCachingStrategy method readHdfsFile.
static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind) throws Exception {
FSDataInputStream fis = null;
long totalRead = 0;
try {
fis = fs.open(p);
if (dropBehind != null) {
fis.setDropBehind(dropBehind);
}
byte[] buf = new byte[8196];
while (length > 0) {
int amt = (length > buf.length) ? buf.length : (int) length;
int ret = fis.read(buf, 0, amt);
if (ret == -1) {
return totalRead;
}
totalRead += ret;
length -= ret;
}
} catch (IOException e) {
LOG.error("ioexception", e);
} finally {
if (fis != null) {
fis.close();
}
}
throw new RuntimeException("unreachable");
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestCachingStrategy method testFadviseSkippedForSmallReads.
@Test(timeout = 120000)
public void testFadviseSkippedForSmallReads() throws Exception {
// start a cluster
LOG.info("testFadviseSkippedForSmallReads");
tracker.clear();
Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, true);
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, true);
MiniDFSCluster cluster = null;
String TEST_PATH = "/test";
int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
FSDataInputStream fis = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
// create new file
createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null);
// Since the DataNode was configured with drop-behind, and we didn't
// specify any policy, we should have done drop-behind.
ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
String fadvisedFileName = cluster.getBlockFile(0, block).getName();
Stats stats = tracker.getStats(fadvisedFileName);
stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
stats.clear();
stats.assertNotDroppedInRange(0, TEST_PATH_LEN);
// read file
fis = fs.open(new Path(TEST_PATH));
byte[] buf = new byte[17];
fis.readFully(4096, buf, 0, buf.length);
// we should not have dropped anything because of the small read.
stats = tracker.getStats(fadvisedFileName);
stats.assertNotDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
} finally {
IOUtils.cleanup(null, fis);
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestLocalDistributedCacheManager method testDuplicateDownload.
@Test
public void testDuplicateDownload() throws Exception {
JobConf conf = new JobConf();
conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class);
URI mockBase = new URI("mock://test-nn1/");
when(mockfs.getUri()).thenReturn(mockBase);
Path working = new Path("mock://test-nn1/user/me/");
when(mockfs.getWorkingDirectory()).thenReturn(working);
when(mockfs.resolvePath(any(Path.class))).thenAnswer(new Answer<Path>() {
@Override
public Path answer(InvocationOnMock args) throws Throwable {
return (Path) args.getArguments()[0];
}
});
final URI file = new URI("mock://test-nn1/user/me/file.txt#link");
final Path filePath = new Path(file);
File link = new File("link");
when(mockfs.getFileStatus(any(Path.class))).thenAnswer(new Answer<FileStatus>() {
@Override
public FileStatus answer(InvocationOnMock args) throws Throwable {
Path p = (Path) args.getArguments()[0];
if ("file.txt".equals(p.getName())) {
return new FileStatus(201, false, 1, 500, 101, 101, FsPermission.getDefault(), "me", "me", filePath);
} else {
throw new FileNotFoundException(p + " not supported by mocking");
}
}
});
when(mockfs.getConf()).thenReturn(conf);
final FSDataInputStream in = new FSDataInputStream(new MockInputStream("This is a test file\n".getBytes()));
when(mockfs.open(any(Path.class), anyInt())).thenAnswer(new Answer<FSDataInputStream>() {
@Override
public FSDataInputStream answer(InvocationOnMock args) throws Throwable {
Path src = (Path) args.getArguments()[0];
if ("file.txt".equals(src.getName())) {
return in;
} else {
throw new FileNotFoundException(src + " not supported by mocking");
}
}
});
DistributedCache.addCacheFile(file, conf);
DistributedCache.addCacheFile(file, conf);
conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "101,101");
conf.set(MRJobConfig.CACHE_FILES_SIZES, "201,201");
conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false,false");
conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath());
LocalDistributedCacheManager manager = new LocalDistributedCacheManager();
try {
manager.setup(conf);
assertTrue(link.exists());
} finally {
manager.close();
}
assertFalse(link.exists());
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class MapTask method getSplitDetails.
@SuppressWarnings("unchecked")
private <T> T getSplitDetails(Path file, long offset) throws IOException {
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream inFile = fs.open(file);
inFile.seek(offset);
String className = StringInterner.weakIntern(Text.readString(inFile));
Class<T> cls;
try {
cls = (Class<T>) conf.getClassByName(className);
} catch (ClassNotFoundException ce) {
IOException wrap = new IOException("Split class " + className + " not found");
wrap.initCause(ce);
throw wrap;
}
SerializationFactory factory = new SerializationFactory(conf);
Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls);
deserializer.open(inFile);
T split = deserializer.deserialize(null);
long pos = inFile.getPos();
getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
inFile.close();
return split;
}
Aggregations