use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestFileConcurrentReader method tailFile.
private long tailFile(Path file, long startPos) throws IOException {
long numRead = 0;
FSDataInputStream inputStream = fileSystem.open(file);
inputStream.seek(startPos);
int len = 4 * 1024;
byte[] buf = new byte[len];
int read;
while ((read = inputStream.read(buf)) > -1) {
LOG.info(String.format("read %d bytes", read));
if (!validateSequentialBytes(buf, (int) (startPos + numRead), read)) {
LOG.error(String.format("invalid bytes: [%s]\n", Arrays.toString(buf)));
throw new ChecksumException(String.format("unable to validate bytes"), startPos);
}
numRead += read;
}
inputStream.close();
return numRead + startPos - 1;
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestFileConcurrentReader method runTestUnfinishedBlockCRCError.
private void runTestUnfinishedBlockCRCError(final boolean transferToAllowed, final SyncType syncType, final int writeSize, Configuration conf) throws IOException {
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_TRANSFERTO_ALLOWED_KEY, transferToAllowed);
init(conf);
final Path file = new Path("/block-being-written-to");
final int numWrites = 2000;
final AtomicBoolean writerDone = new AtomicBoolean(false);
final AtomicBoolean writerStarted = new AtomicBoolean(false);
final AtomicBoolean error = new AtomicBoolean(false);
final Thread writer = new Thread(new Runnable() {
@Override
public void run() {
try {
FSDataOutputStream outputStream = fileSystem.create(file);
if (syncType == SyncType.APPEND) {
outputStream.close();
outputStream = fileSystem.append(file);
}
try {
for (int i = 0; !error.get() && i < numWrites; i++) {
final byte[] writeBuf = DFSTestUtil.generateSequentialBytes(i * writeSize, writeSize);
outputStream.write(writeBuf);
if (syncType == SyncType.SYNC) {
outputStream.hflush();
}
writerStarted.set(true);
}
} catch (IOException e) {
error.set(true);
LOG.error("error writing to file", e);
} finally {
outputStream.close();
}
writerDone.set(true);
} catch (Exception e) {
LOG.error("error in writer", e);
throw new RuntimeException(e);
}
}
});
Thread tailer = new Thread(new Runnable() {
@Override
public void run() {
try {
long startPos = 0;
while (!writerDone.get() && !error.get()) {
if (writerStarted.get()) {
try {
startPos = tailFile(file, startPos);
} catch (IOException e) {
LOG.error(String.format("error tailing file %s", file), e);
throw new RuntimeException(e);
}
}
}
} catch (RuntimeException e) {
if (e.getCause() instanceof ChecksumException) {
error.set(true);
}
writer.interrupt();
LOG.error("error in tailer", e);
throw e;
}
}
});
writer.start();
tailer.start();
try {
writer.join();
tailer.join();
assertFalse("error occurred, see log above", error.get());
} catch (InterruptedException e) {
LOG.info("interrupted waiting for writer or tailer to complete");
Thread.currentThread().interrupt();
}
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestFsck method testCorruptBlock.
@Test
public void testCorruptBlock() throws Exception {
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
// Set short retry timeouts so this test runs faster
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
FileSystem fs = null;
DFSClient dfsClient = null;
LocatedBlocks blocks = null;
int replicaCount = 0;
Random random = new Random();
String outStr = null;
short factor = 1;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
fs = cluster.getFileSystem();
Path file1 = new Path("/testCorruptBlock");
DFSTestUtil.createFile(fs, file1, 1024, factor, 0);
// Wait until file replication has completed
DFSTestUtil.waitReplication(fs, file1, factor);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
// Make sure filesystem is in healthy state
outStr = runFsck(conf, 0, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
// corrupt replicas
File blockFile = cluster.getBlockFile(0, block);
if (blockFile != null && blockFile.exists()) {
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
FileChannel channel = raFile.getChannel();
String badString = "BADBAD";
int rand = random.nextInt((int) channel.size() / 2);
raFile.seek(rand);
raFile.write(badString.getBytes());
raFile.close();
}
// Read the file to trigger reportBadBlocks
try {
IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf, true);
} catch (IOException ie) {
assertTrue(ie instanceof ChecksumException);
}
dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
while (replicaCount != factor) {
try {
Thread.sleep(100);
} catch (InterruptedException ignore) {
}
blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
}
assertTrue(blocks.get(0).isCorrupt());
// Check if fsck reports the same
outStr = runFsck(conf, 1, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
assertTrue(outStr.contains("testCorruptBlock"));
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestPread method testHedgedReadLoopTooManyTimes.
@Test
public void testHedgedReadLoopTooManyTimes() throws IOException {
Configuration conf = new Configuration();
int numHedgedReadPoolThreads = 5;
final int hedgedReadTimeoutMillis = 50;
conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, numHedgedReadPoolThreads);
conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY, hedgedReadTimeoutMillis);
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 0);
// Set up the InjectionHandler
DFSClientFaultInjector.set(Mockito.mock(DFSClientFaultInjector.class));
DFSClientFaultInjector injector = DFSClientFaultInjector.get();
final int sleepMs = 100;
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
if (true) {
Thread.sleep(hedgedReadTimeoutMillis + sleepMs);
if (DFSClientFaultInjector.exceptionNum.compareAndSet(0, 1)) {
System.out.println("-------------- throw Checksum Exception");
throw new ChecksumException("ChecksumException test", 100);
}
}
return null;
}
}).when(injector).fetchFromDatanodeException();
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
if (true) {
Thread.sleep(sleepMs * 2);
}
return null;
}
}).when(injector).readFromDatanodeDelay();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).build();
DistributedFileSystem fileSys = cluster.getFileSystem();
DFSClient dfsClient = fileSys.getClient();
FSDataOutputStream output = null;
DFSInputStream input = null;
String filename = "/hedgedReadMaxOut.dat";
try {
Path file = new Path(filename);
output = fileSys.create(file, (short) 2);
byte[] data = new byte[64 * 1024];
output.write(data);
output.flush();
output.write(data);
output.flush();
output.write(data);
output.flush();
output.close();
byte[] buffer = new byte[64 * 1024];
input = dfsClient.open(filename);
input.read(0, buffer, 0, 1024);
input.close();
assertEquals(3, input.getHedgedReadOpsLoopNumForTesting());
} catch (BlockMissingException e) {
assertTrue(false);
} finally {
Mockito.reset(injector);
IOUtils.cleanup(null, input);
IOUtils.cleanup(null, output);
fileSys.close();
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestDFSAdmin method testReportCommand.
@Test(timeout = 120000)
public void testReportCommand() throws Exception {
redirectStream();
/* init conf */
final Configuration dfsConf = new HdfsConfiguration();
dfsConf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, // 0.5s
500);
dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
final Path baseDir = new Path(PathUtils.getTestDir(getClass()).getAbsolutePath(), GenericTestUtils.getMethodName());
dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.toString());
final int numDn = 3;
/* init cluster */
try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(dfsConf).numDataNodes(numDn).build()) {
miniCluster.waitActive();
assertEquals(numDn, miniCluster.getDataNodes().size());
/* local vars */
final DFSAdmin dfsAdmin = new DFSAdmin(dfsConf);
final DFSClient client = miniCluster.getFileSystem().getClient();
/* run and verify report command */
resetStream();
assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
verifyNodesAndCorruptBlocks(numDn, numDn, 0, client);
/* shut down one DN */
final List<DataNode> datanodes = miniCluster.getDataNodes();
final DataNode last = datanodes.get(datanodes.size() - 1);
last.shutdown();
miniCluster.setDataNodeDead(last.getDatanodeId());
/* run and verify report command */
assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, client);
/* corrupt one block */
final short replFactor = 1;
final long fileLength = 512L;
final FileSystem fs = miniCluster.getFileSystem();
final Path file = new Path(baseDir, "/corrupted");
DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L);
DFSTestUtil.waitReplication(fs, file, replFactor);
final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file);
final int blockFilesCorrupted = miniCluster.corruptBlockOnDataNodes(block);
assertEquals("Fail to corrupt all replicas for block " + block, replFactor, blockFilesCorrupted);
try {
IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), conf, true);
fail("Should have failed to read the file with corrupted blocks.");
} catch (ChecksumException ignored) {
// expected exception reading corrupt blocks
}
/*
* Increase replication factor, this should invoke transfer request.
* Receiving datanode fails on checksum and reports it to namenode
*/
fs.setReplication(file, (short) (replFactor + 1));
/* get block details and check if the block is corrupt */
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
LocatedBlocks blocks = null;
try {
miniCluster.triggerBlockReports();
blocks = client.getNamenode().getBlockLocations(file.toString(), 0, Long.MAX_VALUE);
} catch (IOException e) {
return false;
}
return blocks != null && blocks.get(0).isCorrupt();
}
}, 1000, 60000);
BlockManagerTestUtil.updateState(miniCluster.getNameNode().getNamesystem().getBlockManager());
/* run and verify report command */
resetStream();
assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, client);
}
}
Aggregations