use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.
the class TestFileAppendRestart method testAppendRestart.
/**
* Regression test for HDFS-2991. Creates and appends to files
* where blocks start/end on block boundaries.
*/
@Test
public void testAppendRestart() throws Exception {
final Configuration conf = new HdfsConfiguration();
// Turn off persistent IPC, so that the DFSClient can survive NN restart
conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, 0);
MiniDFSCluster cluster = null;
FSDataOutputStream stream = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
FileSystem fs = cluster.getFileSystem();
File editLog = new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get(0), NNStorage.getInProgressEditsFileName(1));
EnumMap<FSEditLogOpCodes, Holder<Integer>> counts;
Path p1 = new Path("/block-boundaries");
writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
// OP_ADD to create file
// OP_ADD_BLOCK for first block
// OP_CLOSE to close file
// OP_APPEND to reopen file
// OP_ADD_BLOCK for second block
// OP_CLOSE to close file
assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_ADD).held);
assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_APPEND).held);
assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held);
assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_CLOSE).held);
Path p2 = new Path("/not-block-boundaries");
writeAndAppend(fs, p2, BLOCK_SIZE / 2, BLOCK_SIZE);
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
// OP_ADD to create file
// OP_ADD_BLOCK for first block
// OP_CLOSE to close file
// OP_APPEND to re-establish the lease
// OP_UPDATE_BLOCKS from the updatePipeline call (increments genstamp of last block)
// OP_ADD_BLOCK at the start of the second block
// OP_CLOSE to close file
// Total: 2 OP_ADDs, 1 OP_UPDATE_BLOCKS, 2 OP_ADD_BLOCKs, and 2 OP_CLOSEs
// in addition to the ones above
assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_ADD).held);
assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_APPEND).held);
assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held);
assertEquals(2 + 2, (int) counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held);
assertEquals(2 + 2, (int) counts.get(FSEditLogOpCodes.OP_CLOSE).held);
cluster.restartNameNode();
AppendTestUtil.check(fs, p1, 2 * BLOCK_SIZE);
AppendTestUtil.check(fs, p2, 3 * BLOCK_SIZE / 2);
} finally {
IOUtils.closeStream(stream);
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.
the class FSEditLogLoader method loadEditRecords.
long loadEditRecords(EditLogInputStream in, boolean closeOnExit, long expectedStartingTxId, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
FSDirectory fsDir = fsNamesys.dir;
EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
if (LOG.isTraceEnabled()) {
LOG.trace("Acquiring write lock to replay edit log");
}
fsNamesys.writeLock();
fsDir.writeLock();
long[] recentOpcodeOffsets = new long[4];
Arrays.fill(recentOpcodeOffsets, -1);
long expectedTxId = expectedStartingTxId;
long numEdits = 0;
long lastTxId = in.getLastTxId();
long numTxns = (lastTxId - expectedStartingTxId) + 1;
StartupProgress prog = NameNode.getStartupProgress();
Step step = createStartupProgressStep(in);
prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
long lastLogTime = monotonicNow();
long lastInodeId = fsNamesys.dir.getLastInodeId();
try {
while (true) {
try {
FSEditLogOp op;
try {
op = in.readOp();
if (op == null) {
break;
}
} catch (Throwable e) {
// Handle a problem with our input
check203UpgradeFailure(in.getVersion(true), e);
String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
FSImage.LOG.error(errorMessage, e);
if (recovery == null) {
// recovery mode.
throw new EditLogInputException(errorMessage, e, numEdits);
}
MetaRecoveryContext.editLogLoaderPrompt("We failed to read txId " + expectedTxId, recovery, "skipping the bad section in the log");
in.resync();
continue;
}
recentOpcodeOffsets[(int) (numEdits % recentOpcodeOffsets.length)] = in.getPosition();
if (op.hasTransactionId()) {
if (op.getTransactionId() > expectedTxId) {
MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be a gap in the edit log. We expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "ignoring missing " + " transaction IDs");
} else if (op.getTransactionId() < expectedTxId) {
MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be an out-of-order edit in the edit log. We " + "expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "skipping the out-of-order edit");
continue;
}
}
try {
if (LOG.isTraceEnabled()) {
LOG.trace("op=" + op + ", startOpt=" + startOpt + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
}
long inodeId = applyEditLogOp(op, fsDir, startOpt, in.getVersion(true), lastInodeId);
if (lastInodeId < inodeId) {
lastInodeId = inodeId;
}
} catch (RollingUpgradeOp.RollbackException e) {
throw e;
} catch (Throwable e) {
LOG.error("Encountered exception on operation " + op, e);
if (recovery == null) {
throw e instanceof IOException ? (IOException) e : new IOException(e);
}
MetaRecoveryContext.editLogLoaderPrompt("Failed to " + "apply edit log operation " + op + ": error " + e.getMessage(), recovery, "applying edits");
}
// Now that the operation has been successfully decoded and
// applied, update our bookkeeping.
incrOpCount(op.opCode, opCounts, step, counter);
if (op.hasTransactionId()) {
lastAppliedTxId = op.getTransactionId();
expectedTxId = lastAppliedTxId + 1;
} else {
expectedTxId = lastAppliedTxId = expectedStartingTxId;
}
// log progress
if (op.hasTransactionId()) {
long now = monotonicNow();
if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
int percent = Math.round((float) deltaTxId / numTxns * 100);
LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns + " transactions completed. (" + percent + "%)");
lastLogTime = now;
}
}
numEdits++;
totalEdits++;
} catch (RollingUpgradeOp.RollbackException e) {
LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
break;
} catch (MetaRecoveryContext.RequestStopException e) {
MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + in.getPosition() + "/" + in.length());
break;
}
}
} finally {
fsNamesys.dir.resetLastInodeId(lastInodeId);
if (closeOnExit) {
in.close();
}
fsDir.writeUnlock();
fsNamesys.writeUnlock("loadEditRecords");
if (LOG.isTraceEnabled()) {
LOG.trace("replaying edit log finished");
}
if (FSImage.LOG.isDebugEnabled()) {
dumpOpCounts(opCounts);
}
}
return numEdits;
}
use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.
the class TestQJMWithFaults method testRandomized.
/**
* Test case in which three JournalNodes randomly flip flop between
* up and down states every time they get an RPC.
*
* The writer keeps track of the latest ACKed edit, and on every
* recovery operation, ensures that it recovers at least to that
* point or higher. Since at any given point, a majority of JNs
* may be injecting faults, any writer operation is allowed to fail,
* so long as the exception message indicates it failed due to injected
* faults.
*
* Given a random seed, the test should be entirely deterministic.
*/
@Test
public void testRandomized() throws Exception {
long seed;
Long userSpecifiedSeed = Long.getLong(RAND_SEED_PROPERTY);
if (userSpecifiedSeed != null) {
LOG.info("Using seed specified in system property");
seed = userSpecifiedSeed;
// If the user specifies a seed, then we should gather all the
// IPC trace information so that debugging is easier. This makes
// the test run about 25% slower otherwise.
GenericTestUtils.setLogLevel(ProtobufRpcEngine.LOG, Level.ALL);
} else {
seed = new Random().nextLong();
}
LOG.info("Random seed: " + seed);
Random r = new Random(seed);
MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build();
cluster.waitActive();
// Format the cluster using a non-faulty QJM.
QuorumJournalManager qjmForInitialFormat = createInjectableQJM(cluster);
qjmForInitialFormat.format(FAKE_NSINFO);
qjmForInitialFormat.close();
try {
long txid = 0;
long lastAcked = 0;
for (int i = 0; i < NUM_WRITER_ITERS; i++) {
LOG.info("Starting writer " + i + "\n-------------------");
QuorumJournalManager qjm = createRandomFaultyQJM(cluster, r);
try {
long recovered;
try {
recovered = QJMTestUtil.recoverAndReturnLastTxn(qjm);
} catch (Throwable t) {
LOG.info("Failed recovery", t);
checkException(t);
continue;
}
assertTrue("Recovered only up to txnid " + recovered + " but had gotten an ack for " + lastAcked, recovered >= lastAcked);
txid = recovered + 1;
// at failure cases.
if (txid > 100 && i % 10 == 1) {
qjm.purgeLogsOlderThan(txid - 100);
}
Holder<Throwable> thrown = new Holder<Throwable>(null);
for (int j = 0; j < SEGMENTS_PER_WRITER; j++) {
lastAcked = writeSegmentUntilCrash(cluster, qjm, txid, 4, thrown);
if (thrown.held != null) {
LOG.info("Failed write", thrown.held);
checkException(thrown.held);
break;
}
txid += 4;
}
} finally {
qjm.close();
}
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.
the class TestEditLogFileInputStream method testReadURL.
@Test
public void testReadURL() throws Exception {
HttpURLConnection conn = mock(HttpURLConnection.class);
doReturn(new ByteArrayInputStream(FAKE_LOG_DATA)).when(conn).getInputStream();
doReturn(HttpURLConnection.HTTP_OK).when(conn).getResponseCode();
doReturn(Integer.toString(FAKE_LOG_DATA.length)).when(conn).getHeaderField("Content-Length");
URLConnectionFactory factory = mock(URLConnectionFactory.class);
doReturn(conn).when(factory).openConnection(Mockito.<URL>any(), anyBoolean());
URL url = new URL("http://localhost/fakeLog");
EditLogInputStream elis = EditLogFileInputStream.fromUrl(factory, url, HdfsServerConstants.INVALID_TXID, HdfsServerConstants.INVALID_TXID, false);
// Read the edit log and verify that we got all of the data.
EnumMap<FSEditLogOpCodes, Holder<Integer>> counts = FSImageTestUtil.countEditLogOpTypes(elis);
assertThat(counts.get(FSEditLogOpCodes.OP_ADD).held, is(1));
assertThat(counts.get(FSEditLogOpCodes.OP_SET_GENSTAMP_V1).held, is(1));
assertThat(counts.get(FSEditLogOpCodes.OP_CLOSE).held, is(1));
// Check that length header was picked up.
assertEquals(FAKE_LOG_DATA.length, elis.length());
elis.close();
}
Aggregations