use of java.util.concurrent.ExecutorService in project hbase by apache.
the class LoadIncrementalHFiles method loadHFileQueue.
/**
* Used by the replication sink to load the hfiles from the source cluster. It does the following,
* <ol>
* <li>LoadIncrementalHFiles#groupOrSplitPhase(Table, ExecutorService, Deque, Pair)}</li>
* <li>LoadIncrementalHFiles#bulkLoadPhase(Table, Connection, ExecutorService, Deque, Multimap)
* </li>
* </ol>
* @param table Table to which these hfiles should be loaded to
* @param conn Connection to use
* @param queue {@link LoadQueueItem} has hfiles yet to be loaded
* @param startEndKeys starting and ending row keys of the region
*/
public void loadHFileQueue(final Table table, final Connection conn, Deque<LoadQueueItem> queue, Pair<byte[][], byte[][]> startEndKeys, boolean copyFile) throws IOException {
ExecutorService pool = null;
try {
pool = createExecutorService();
Multimap<ByteBuffer, LoadQueueItem> regionGroups = groupOrSplitPhase(table, pool, queue, startEndKeys).getFirst();
bulkLoadPhase(table, conn, pool, queue, regionGroups, copyFile, null);
} finally {
if (pool != null) {
pool.shutdown();
}
}
}
use of java.util.concurrent.ExecutorService in project hbase by apache.
the class RegionMover method unloadRegions.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FB is wrong; its size is read")
private void unloadRegions(Admin admin, String server, ArrayList<String> regionServers, boolean ack, List<HRegionInfo> movedRegions) throws Exception {
// FindBugs: DLS_DEAD_LOCAL_STORE
List<HRegionInfo> regionsToMove = new ArrayList<>();
regionsToMove = getRegions(this.conf, server);
if (regionsToMove.isEmpty()) {
LOG.info("No Regions to move....Quitting now");
return;
} else if (regionServers.isEmpty()) {
LOG.warn("No Regions were moved - no servers available");
throw new Exception("No online region servers");
}
while (true) {
regionsToMove = getRegions(this.conf, server);
regionsToMove.removeAll(movedRegions);
if (regionsToMove.isEmpty()) {
break;
}
int counter = 0;
LOG.info("Moving " + regionsToMove.size() + " regions from " + this.hostname + " to " + regionServers.size() + " servers using " + this.maxthreads + " threads .Ack Mode:" + ack);
ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
List<Future<Boolean>> taskList = new ArrayList<>();
int serverIndex = 0;
while (counter < regionsToMove.size()) {
if (ack) {
Future<Boolean> task = moveRegionsPool.submit(new MoveWithAck(admin, regionsToMove.get(counter), server, regionServers.get(serverIndex), movedRegions));
taskList.add(task);
} else {
Future<Boolean> task = moveRegionsPool.submit(new MoveWithoutAck(admin, regionsToMove.get(counter), server, regionServers.get(serverIndex), movedRegions));
taskList.add(task);
}
counter++;
serverIndex = (serverIndex + 1) % regionServers.size();
}
moveRegionsPool.shutdown();
long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration().getInt(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
try {
if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) {
moveRegionsPool.shutdownNow();
}
} catch (InterruptedException e) {
moveRegionsPool.shutdownNow();
Thread.currentThread().interrupt();
}
for (Future<Boolean> future : taskList) {
try {
// if even after shutdownNow threads are stuck we wait for 5 secs max
if (!future.get(5, TimeUnit.SECONDS)) {
LOG.error("Was Not able to move region....Exiting Now");
throw new Exception("Could not move region Exception");
}
} catch (InterruptedException e) {
LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e);
throw e;
} catch (ExecutionException e) {
LOG.error("Got Exception From Thread While moving region " + e.getMessage(), e);
throw e;
} catch (CancellationException e) {
LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds + "secs", e);
throw e;
}
}
}
}
use of java.util.concurrent.ExecutorService in project hbase by apache.
the class RegionMover method unload.
/**
* Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In
* noAck mode we do not make sure that region is successfully online on the target region
* server,hence it is best effort.We do not unload regions to hostnames given in
* {@link #excludeFile}.
* @return true if unloading succeeded, false otherwise
* @throws InterruptedException if the unloader thread was interrupted
* @throws ExecutionException
* @throws TimeoutException
*/
public boolean unload() throws InterruptedException, ExecutionException, TimeoutException {
setConf();
deleteFile(this.filename);
ExecutorService unloadPool = Executors.newFixedThreadPool(1);
Future<Boolean> unloadTask = unloadPool.submit(new Unload(this));
unloadPool.shutdown();
try {
if (!unloadPool.awaitTermination((long) this.timeout, TimeUnit.SECONDS)) {
LOG.warn("Timed out before finishing the unloading operation. Timeout:" + this.timeout + "sec");
unloadPool.shutdownNow();
}
} catch (InterruptedException e) {
unloadPool.shutdownNow();
Thread.currentThread().interrupt();
}
try {
return unloadTask.get(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
LOG.warn("Interrupted while unloading Regions from " + this.hostname, e);
throw e;
} catch (ExecutionException e) {
LOG.error("Error while unloading regions from RegionServer " + this.hostname, e);
throw e;
}
}
use of java.util.concurrent.ExecutorService in project hbase by apache.
the class RegionMover method loadRegions.
private void loadRegions(Admin admin, String hostname, int port, List<HRegionInfo> regionsToMove, boolean ack) throws Exception {
String server = null;
List<HRegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<HRegionInfo>());
int maxWaitInSeconds = admin.getConfiguration().getInt(SERVERSTART_WAIT_MAX_KEY, DEFAULT_SERVERSTART_WAIT_MAX);
long maxWait = EnvironmentEdgeManager.currentTime() + maxWaitInSeconds * 1000;
while ((EnvironmentEdgeManager.currentTime() < maxWait) && (server == null)) {
try {
ArrayList<String> regionServers = getServers(admin);
// Remove the host Region server from target Region Servers list
server = stripServer(regionServers, hostname, port);
if (server != null) {
break;
}
} catch (IOException e) {
LOG.warn("Could not get list of region servers", e);
} catch (Exception e) {
LOG.info("hostname=" + hostname + " is not up yet, waiting");
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
LOG.error("Interrupted while waiting for " + hostname + " to be up.Quitting now", e);
throw e;
}
}
if (server == null) {
LOG.error("Host:" + hostname + " is not up.Giving up.");
throw new Exception("Host to load regions not online");
}
LOG.info("Moving " + regionsToMove.size() + " regions to " + server + " using " + this.maxthreads + " threads.Ack mode:" + this.ack);
ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
List<Future<Boolean>> taskList = new ArrayList<>();
int counter = 0;
while (counter < regionsToMove.size()) {
HRegionInfo region = regionsToMove.get(counter);
String currentServer = getServerNameForRegion(admin, region);
if (currentServer == null) {
LOG.warn("Could not get server for Region:" + region.getEncodedName() + " moving on");
counter++;
continue;
} else if (server.equals(currentServer)) {
LOG.info("Region " + region.getRegionNameAsString() + "already on target server=" + server);
counter++;
continue;
}
if (ack) {
Future<Boolean> task = moveRegionsPool.submit(new MoveWithAck(admin, region, currentServer, server, movedRegions));
taskList.add(task);
} else {
Future<Boolean> task = moveRegionsPool.submit(new MoveWithoutAck(admin, region, currentServer, server, movedRegions));
taskList.add(task);
}
counter++;
}
moveRegionsPool.shutdown();
long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration().getInt(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
try {
if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) {
moveRegionsPool.shutdownNow();
}
} catch (InterruptedException e) {
moveRegionsPool.shutdownNow();
Thread.currentThread().interrupt();
}
for (Future<Boolean> future : taskList) {
try {
// if even after shutdownNow threads are stuck we wait for 5 secs max
if (!future.get(5, TimeUnit.SECONDS)) {
LOG.error("Was Not able to move region....Exiting Now");
throw new Exception("Could not move region Exception");
}
} catch (InterruptedException e) {
LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e);
throw e;
} catch (ExecutionException e) {
LOG.error("Got Exception From Thread While moving region " + e.getMessage(), e);
throw e;
} catch (CancellationException e) {
LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds + "secs", e);
throw e;
}
}
}
use of java.util.concurrent.ExecutorService in project hbase by apache.
the class TestFastFail method testFastFail.
@Ignore("Can go zombie -- see HBASE-14421; FIX")
@Test
public void testFastFail() throws IOException, InterruptedException {
Admin admin = TEST_UTIL.getAdmin();
final String tableName = name.getMethodName();
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(Bytes.toBytes(tableName)));
desc.addFamily(new HColumnDescriptor(FAMILY));
admin.createTable(desc, Bytes.toBytes("aaaa"), Bytes.toBytes("zzzz"), 32);
final long numRows = 1000;
Configuration conf = TEST_UTIL.getConfiguration();
conf.setLong(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, SLEEPTIME * 100);
conf.setInt(HConstants.HBASE_CLIENT_PAUSE, SLEEPTIME / 10);
conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_THREASHOLD_MS, 0);
conf.setClass(HConstants.HBASE_CLIENT_FAST_FAIL_INTERCEPTOR_IMPL, MyPreemptiveFastFailInterceptor.class, PreemptiveFastFailInterceptor.class);
final Connection connection = ConnectionFactory.createConnection(conf);
/**
* Write numRows worth of data, so that the workers can arbitrarily read.
*/
List<Put> puts = new ArrayList<>();
for (long i = 0; i < numRows; i++) {
byte[] rowKey = longToByteArrayKey(i);
Put put = new Put(rowKey);
// value is the same as the row key
byte[] value = rowKey;
put.addColumn(FAMILY, QUALIFIER, value);
puts.add(put);
}
try (Table table = connection.getTable(TableName.valueOf(tableName))) {
table.put(puts);
LOG.info("Written all puts.");
}
/**
* The number of threads that are going to perform actions against the test
* table.
*/
int nThreads = 100;
ExecutorService service = Executors.newFixedThreadPool(nThreads);
final CountDownLatch continueOtherHalf = new CountDownLatch(1);
final CountDownLatch doneHalfway = new CountDownLatch(nThreads);
final AtomicInteger numSuccessfullThreads = new AtomicInteger(0);
final AtomicInteger numFailedThreads = new AtomicInteger(0);
// The total time taken for the threads to perform the second put;
final AtomicLong totalTimeTaken = new AtomicLong(0);
final AtomicInteger numBlockedWorkers = new AtomicInteger(0);
final AtomicInteger numPreemptiveFastFailExceptions = new AtomicInteger(0);
List<Future<Boolean>> futures = new ArrayList<>();
for (int i = 0; i < nThreads; i++) {
futures.add(service.submit(new Callable<Boolean>() {
/**
* The workers are going to perform a couple of reads. The second read
* will follow the killing of a regionserver so that we make sure that
* some of threads go into PreemptiveFastFailExcception
*/
public Boolean call() throws Exception {
try (Table table = connection.getTable(TableName.valueOf(tableName))) {
// Add some jitter here
Thread.sleep(Math.abs(random.nextInt()) % 250);
byte[] row = longToByteArrayKey(Math.abs(random.nextLong()) % numRows);
Get g = new Get(row);
g.addColumn(FAMILY, QUALIFIER);
try {
table.get(g);
} catch (Exception e) {
LOG.debug("Get failed : ", e);
doneHalfway.countDown();
return false;
}
// Done with one get, proceeding to do the next one.
doneHalfway.countDown();
continueOtherHalf.await();
long startTime = System.currentTimeMillis();
g = new Get(row);
g.addColumn(FAMILY, QUALIFIER);
try {
table.get(g);
// The get was successful
numSuccessfullThreads.addAndGet(1);
} catch (Exception e) {
if (e instanceof PreemptiveFastFailException) {
// We were issued a PreemptiveFastFailException
numPreemptiveFastFailExceptions.addAndGet(1);
}
// Irrespective of PFFE, the request failed.
numFailedThreads.addAndGet(1);
return false;
} finally {
long enTime = System.currentTimeMillis();
totalTimeTaken.addAndGet(enTime - startTime);
if ((enTime - startTime) >= SLEEPTIME) {
// Considering the slow workers as the blockedWorkers.
// This assumes that the threads go full throttle at performing
// actions. In case the thread scheduling itself is as slow as
// SLEEPTIME, then this test might fail and so, we might have
// set it to a higher number on slower machines.
numBlockedWorkers.addAndGet(1);
}
}
return true;
} catch (Exception e) {
LOG.error("Caught unknown exception", e);
doneHalfway.countDown();
return false;
}
}
}));
}
doneHalfway.await();
// Kill a regionserver
TEST_UTIL.getHBaseCluster().getRegionServer(0).getRpcServer().stop();
TEST_UTIL.getHBaseCluster().getRegionServer(0).stop("Testing");
// Let the threads continue going
continueOtherHalf.countDown();
Thread.sleep(2 * SLEEPTIME);
// Start a RS in the cluster
TEST_UTIL.getHBaseCluster().startRegionServer();
int numThreadsReturnedFalse = 0;
int numThreadsReturnedTrue = 0;
int numThreadsThrewExceptions = 0;
for (Future<Boolean> f : futures) {
try {
numThreadsReturnedTrue += f.get() ? 1 : 0;
numThreadsReturnedFalse += f.get() ? 0 : 1;
} catch (Exception e) {
numThreadsThrewExceptions++;
}
}
LOG.debug("numThreadsReturnedFalse:" + numThreadsReturnedFalse + " numThreadsReturnedTrue:" + numThreadsReturnedTrue + " numThreadsThrewExceptions:" + numThreadsThrewExceptions + " numFailedThreads:" + numFailedThreads.get() + " numSuccessfullThreads:" + numSuccessfullThreads.get() + " numBlockedWorkers:" + numBlockedWorkers.get() + " totalTimeWaited: " + totalTimeTaken.get() / (numBlockedWorkers.get() == 0 ? Long.MAX_VALUE : numBlockedWorkers.get()) + " numPFFEs: " + numPreemptiveFastFailExceptions.get());
assertEquals("The expected number of all the successfull and the failed " + "threads should equal the total number of threads that we spawned", nThreads, numFailedThreads.get() + numSuccessfullThreads.get());
assertEquals("All the failures should be coming from the secondput failure", numFailedThreads.get(), numThreadsReturnedFalse);
assertEquals("Number of threads that threw execution exceptions " + "otherwise should be 0", numThreadsThrewExceptions, 0);
assertEquals("The regionservers that returned true should equal to the" + " number of successful threads", numThreadsReturnedTrue, numSuccessfullThreads.get());
assertTrue("There will be atleast one thread that retried instead of failing", MyPreemptiveFastFailInterceptor.numBraveSouls.get() > 0);
assertTrue("There will be atleast one PreemptiveFastFail exception," + " otherwise, the test makes little sense." + "numPreemptiveFastFailExceptions: " + numPreemptiveFastFailExceptions.get(), numPreemptiveFastFailExceptions.get() > 0);
assertTrue("Only few thread should ideally be waiting for the dead " + "regionserver to be coming back. numBlockedWorkers:" + numBlockedWorkers.get() + " threads that retried : " + MyPreemptiveFastFailInterceptor.numBraveSouls.get(), numBlockedWorkers.get() <= MyPreemptiveFastFailInterceptor.numBraveSouls.get());
}
Aggregations