Example 1 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hadoop by apache.

the class ITestS3ADeleteManyFiles method testBulkRenameAndDelete.

   * CAUTION: If this test starts failing, please make sure that the
   * {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not
   * set too low. Alternatively, consider reducing the
   * <code>scale.test.operation.count</code> parameter in
   * <code>getOperationCount()</code>.
   * @see #getOperationCount()
public void testBulkRenameAndDelete() throws Throwable {
    final Path scaleTestDir = path("testBulkRenameAndDelete");
    final Path srcDir = new Path(scaleTestDir, "src");
    final Path finalDir = new Path(scaleTestDir, "final");
    final long count = getOperationCount();
    final S3AFileSystem fs = getFileSystem();
    ContractTestUtils.rm(fs, scaleTestDir, true, false);
    int testBufferSize = fs.getConf().getInt(ContractTestUtils.IO_CHUNK_BUFFER_SIZE, ContractTestUtils.DEFAULT_IO_CHUNK_BUFFER_SIZE);
    // use Executor to speed up file creation
    ExecutorService exec = Executors.newFixedThreadPool(16);
    final ExecutorCompletionService<Boolean> completionService = new ExecutorCompletionService<>(exec);
    try {
        final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');
        for (int i = 0; i < count; ++i) {
            final String fileName = "foo-" + i;
            completionService.submit(new Callable<Boolean>() {

                public Boolean call() throws IOException {
                    ContractTestUtils.createFile(fs, new Path(srcDir, fileName), false, data);
                    return fs.exists(new Path(srcDir, fileName));
        for (int i = 0; i < count; ++i) {
            final Future<Boolean> future = completionService.take();
            try {
                if (!future.get()) {
                    LOG.warn("cannot create file");
            } catch (ExecutionException e) {
                LOG.warn("Error while uploading file", e.getCause());
                throw e;
    } finally {
    int nSrcFiles = fs.listStatus(srcDir).length;
    fs.rename(srcDir, finalDir);
    assertEquals(nSrcFiles, fs.listStatus(finalDir).length);
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + 0));
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + count / 2));
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + (count - 1)));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + 0));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + count / 2));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + (count - 1)));
    ContractTestUtils.assertDeleted(fs, finalDir, true, false);
Also used : Path(org.apache.hadoop.fs.Path) S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException( ExecutorService(java.util.concurrent.ExecutorService) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 2 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HBaseInterClusterReplicationEndpoint method replicate.

   * Do the shipping logic
public boolean replicate(ReplicateContext replicateContext) {
    CompletionService<Integer> pool = new ExecutorCompletionService<>(this.exec);
    List<Entry> entries = replicateContext.getEntries();
    String walGroupId = replicateContext.getWalGroupId();
    int sleepMultiplier = 1;
    int numReplicated = 0;
    if (!peersSelected && this.isRunning()) {
        peersSelected = true;
    int numSinks = replicationSinkMgr.getNumSinks();
    if (numSinks == 0) {
        LOG.warn("No replication sinks found, returning without replicating. The source should retry" + " with the same set of edits.");
        return false;
    // minimum of: configured threads, number of 100-waledit batches,
    //  and number of current sinks
    int n = Math.min(Math.min(this.maxThreads, entries.size() / 100 + 1), numSinks);
    List<List<Entry>> entryLists = new ArrayList<>(n);
    if (n == 1) {
    } else {
        for (int i = 0; i < n; i++) {
            entryLists.add(new ArrayList<>(entries.size() / n + 1));
        // now group by region
        for (Entry e : entries) {
            entryLists.get(Math.abs(Bytes.hashCode(e.getKey().getEncodedRegionName()) % n)).add(e);
    while (this.isRunning() && !exec.isShutdown()) {
        if (!isPeerEnabled()) {
            if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Replicating " + entries.size() + " entries of total size " + replicateContext.getSize());
            int futures = 0;
            for (int i = 0; i < entryLists.size(); i++) {
                if (!entryLists.get(i).isEmpty()) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Submitting " + entryLists.get(i).size() + " entries of total size " + replicateContext.getSize());
                    // RuntimeExceptions encountered here bubble up and are handled in ReplicationSource
                    pool.submit(createReplicator(entryLists.get(i), i));
            IOException iox = null;
            for (int i = 0; i < futures; i++) {
                try {
                    // wait for all futures, remove successful parts
                    // (only the remaining parts will be retried)
                    Future<Integer> f = pool.take();
                    int index = f.get().intValue();
                    int batchSize = entryLists.get(index).size();
                    entryLists.set(index, Collections.<Entry>emptyList());
                    // Now, we have marked the batch as done replicating, record its size
                    numReplicated += batchSize;
                } catch (InterruptedException ie) {
                    iox = new IOException(ie);
                } catch (ExecutionException ee) {
                    // cause must be an IOException
                    iox = (IOException) ee.getCause();
            if (iox != null) {
                // if we had any exceptions, try again
                throw iox;
            if (numReplicated != entries.size()) {
                // Something went wrong here and we don't know what, let's just fail and retry.
                LOG.warn("The number of edits replicated is different from the number received," + " failing for now.");
                return false;
            // update metrics
            this.metrics.setAgeOfLastShippedOp(entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
            return true;
        } catch (IOException ioe) {
            // Didn't ship anything, but must still age the last time we did
            if (ioe instanceof RemoteException) {
                ioe = ((RemoteException) ioe).unwrapRemoteException();
                LOG.warn("Can't replicate because of an error on the remote cluster: ", ioe);
                if (ioe instanceof TableNotFoundException) {
                    if (sleepForRetries("A table is missing in the peer cluster. " + "Replication cannot proceed without losing data.", sleepMultiplier)) {
                } else if (ioe instanceof SaslException) {
                    LOG.warn("Peer encountered SaslException, rechecking all sinks: ", ioe);
            } else {
                if (ioe instanceof SocketTimeoutException) {
                    // This exception means we waited for more than 60s and nothing
                    // happened, the cluster is alive and calling it right away
                    // even for a test just makes things worse.
                    sleepForRetries("Encountered a SocketTimeoutException. Since the " + "call to the remote cluster timed out, which is usually " + "caused by a machine failure or a massive slowdown", this.socketTimeoutMultiplier);
                } else if (ioe instanceof ConnectException) {
                    LOG.warn("Peer is unavailable, rechecking all sinks: ", ioe);
                } else {
                    LOG.warn("Can't replicate because of a local or network error: ", ioe);
            if (sleepForRetries("Since we are unable to replicate", sleepMultiplier)) {
    // in case we exited before replicating
    return false;
Also used : ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException( SaslException( HBaseReplicationEndpoint(org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) Entry(org.apache.hadoop.hbase.wal.WAL.Entry) SocketTimeoutException( ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) RemoteException(org.apache.hadoop.ipc.RemoteException) ConnectException(

Example 3 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class SnapshotManifestV1 method loadRegionManifests.

static List<SnapshotRegionManifest> loadRegionManifests(final Configuration conf, final Executor executor, final FileSystem fs, final Path snapshotDir, final SnapshotDescription desc) throws IOException {
    FileStatus[] regions = FSUtils.listStatus(fs, snapshotDir, new FSUtils.RegionDirFilter(fs));
    if (regions == null) {
        LOG.debug("No regions under directory:" + snapshotDir);
        return null;
    final ExecutorCompletionService<SnapshotRegionManifest> completionService = new ExecutorCompletionService<>(executor);
    for (final FileStatus region : regions) {
        completionService.submit(new Callable<SnapshotRegionManifest>() {

            public SnapshotRegionManifest call() throws IOException {
                HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, region.getPath());
                return buildManifestFromDisk(conf, fs, snapshotDir, hri);
    ArrayList<SnapshotRegionManifest> regionsManifest = new ArrayList<>(regions.length);
    try {
        for (int i = 0; i < regions.length; ++i) {
    } catch (InterruptedException e) {
        throw new InterruptedIOException(e.getMessage());
    } catch (ExecutionException e) {
        IOException ex = new IOException();
        throw ex;
    return regionsManifest;
Also used : InterruptedIOException( FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException( InterruptedIOException( HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ExecutionException(java.util.concurrent.ExecutionException) FSUtils(org.apache.hadoop.hbase.util.FSUtils)

Example 4 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HRegion method initializeStores.

   * Open all Stores.
   * @param reporter
   * @param status
   * @return Highest sequenceId found out in a Store.
   * @throws IOException
private long initializeStores(final CancelableProgressable reporter, MonitoredTask status) throws IOException {
    // Load in all the HStores.
    long maxSeqId = -1;
    // initialized to -1 so that we pick up MemstoreTS from column families
    long maxMemstoreTS = -1;
    if (!htableDescriptor.getFamilies().isEmpty()) {
        // initialize the thread pool for opening stores in parallel.
        ThreadPoolExecutor storeOpenerThreadPool = getStoreOpenAndCloseThreadPool("StoreOpener-" + this.getRegionInfo().getShortNameToLog());
        CompletionService<HStore> completionService = new ExecutorCompletionService<>(storeOpenerThreadPool);
        // initialize each store in parallel
        for (final HColumnDescriptor family : htableDescriptor.getFamilies()) {
            status.setStatus("Instantiating store for column family " + family);
            completionService.submit(new Callable<HStore>() {

                public HStore call() throws IOException {
                    return instantiateHStore(family);
        boolean allStoresOpened = false;
        boolean hasSloppyStores = false;
        try {
            for (int i = 0; i < htableDescriptor.getFamilies().size(); i++) {
                Future<HStore> future = completionService.take();
                HStore store = future.get();
                this.stores.put(store.getFamily().getName(), store);
                if (store.isSloppyMemstore()) {
                    hasSloppyStores = true;
                long storeMaxSequenceId = store.getMaxSequenceId();
                maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), storeMaxSequenceId);
                if (maxSeqId == -1 || storeMaxSequenceId > maxSeqId) {
                    maxSeqId = storeMaxSequenceId;
                long maxStoreMemstoreTS = store.getMaxMemstoreTS();
                if (maxStoreMemstoreTS > maxMemstoreTS) {
                    maxMemstoreTS = maxStoreMemstoreTS;
            allStoresOpened = true;
            if (hasSloppyStores) {
      "Setting FlushNonSloppyStoresFirstPolicy for the region=" + this);
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e.getCause());
        } finally {
            if (!allStoresOpened) {
                // something went wrong, close all opened stores
                LOG.error("Could not initialize all stores for the region=" + this);
                for (Store store : this.stores.values()) {
                    try {
                    } catch (IOException e) {
    return Math.max(maxSeqId, maxMemstoreTS + 1);
Also used : InterruptedIOException( HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException( IOException( MultipleIOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ExecutionException(java.util.concurrent.ExecutionException)

Example 5 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HStore method close.

public ImmutableCollection<StoreFile> close() throws IOException {
    try {
        // Clear so metrics doesn't find them.
        ImmutableCollection<StoreFile> result = storeEngine.getStoreFileManager().clearFiles();
        Collection<StoreFile> compactedfiles = storeEngine.getStoreFileManager().clearCompactedFiles();
        // clear the compacted files
        if (compactedfiles != null && !compactedfiles.isEmpty()) {
        if (!result.isEmpty()) {
            // initialize the thread pool for closing store files in parallel.
            ThreadPoolExecutor storeFileCloserThreadPool = this.region.getStoreFileOpenAndCloseThreadPool("StoreFileCloserThread-" + this.getColumnFamilyName());
            // close each store file in parallel
            CompletionService<Void> completionService = new ExecutorCompletionService<>(storeFileCloserThreadPool);
            for (final StoreFile f : result) {
                completionService.submit(new Callable<Void>() {

                    public Void call() throws IOException {
                        boolean evictOnClose = cacheConf != null ? cacheConf.shouldEvictOnClose() : true;
                        return null;
            IOException ioe = null;
            try {
                for (int i = 0; i < result.size(); i++) {
                    try {
                        Future<Void> future = completionService.take();
                    } catch (InterruptedException e) {
                        if (ioe == null) {
                            ioe = new InterruptedIOException();
                    } catch (ExecutionException e) {
                        if (ioe == null)
                            ioe = new IOException(e.getCause());
            } finally {
            if (ioe != null)
                throw ioe;
        }"Closed " + this);
        return result;
    } finally {
Also used : InterruptedIOException( ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException( IOException( ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ExecutionException(java.util.concurrent.ExecutionException)


