Search in sources :

Example 1 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hadoop by apache.

the class ITestS3ADeleteManyFiles method testBulkRenameAndDelete.

/**
   * CAUTION: If this test starts failing, please make sure that the
   * {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not
   * set too low. Alternatively, consider reducing the
   * <code>scale.test.operation.count</code> parameter in
   * <code>getOperationCount()</code>.
   *
   * @see #getOperationCount()
   */
@Test
public void testBulkRenameAndDelete() throws Throwable {
    final Path scaleTestDir = path("testBulkRenameAndDelete");
    final Path srcDir = new Path(scaleTestDir, "src");
    final Path finalDir = new Path(scaleTestDir, "final");
    final long count = getOperationCount();
    final S3AFileSystem fs = getFileSystem();
    ContractTestUtils.rm(fs, scaleTestDir, true, false);
    fs.mkdirs(srcDir);
    fs.mkdirs(finalDir);
    int testBufferSize = fs.getConf().getInt(ContractTestUtils.IO_CHUNK_BUFFER_SIZE, ContractTestUtils.DEFAULT_IO_CHUNK_BUFFER_SIZE);
    // use Executor to speed up file creation
    ExecutorService exec = Executors.newFixedThreadPool(16);
    final ExecutorCompletionService<Boolean> completionService = new ExecutorCompletionService<>(exec);
    try {
        final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');
        for (int i = 0; i < count; ++i) {
            final String fileName = "foo-" + i;
            completionService.submit(new Callable<Boolean>() {

                @Override
                public Boolean call() throws IOException {
                    ContractTestUtils.createFile(fs, new Path(srcDir, fileName), false, data);
                    return fs.exists(new Path(srcDir, fileName));
                }
            });
        }
        for (int i = 0; i < count; ++i) {
            final Future<Boolean> future = completionService.take();
            try {
                if (!future.get()) {
                    LOG.warn("cannot create file");
                }
            } catch (ExecutionException e) {
                LOG.warn("Error while uploading file", e.getCause());
                throw e;
            }
        }
    } finally {
        exec.shutdown();
    }
    int nSrcFiles = fs.listStatus(srcDir).length;
    fs.rename(srcDir, finalDir);
    assertEquals(nSrcFiles, fs.listStatus(finalDir).length);
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + 0));
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + count / 2));
    ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + (count - 1)));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + 0));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + count / 2));
    ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + (count - 1)));
    ContractTestUtils.assertDeleted(fs, finalDir, true, false);
}
Also used : Path(org.apache.hadoop.fs.Path) S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 2 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HBaseInterClusterReplicationEndpoint method replicate.

/**
   * Do the shipping logic
   */
@Override
public boolean replicate(ReplicateContext replicateContext) {
    CompletionService<Integer> pool = new ExecutorCompletionService<>(this.exec);
    List<Entry> entries = replicateContext.getEntries();
    String walGroupId = replicateContext.getWalGroupId();
    int sleepMultiplier = 1;
    int numReplicated = 0;
    if (!peersSelected && this.isRunning()) {
        connectToPeers();
        peersSelected = true;
    }
    int numSinks = replicationSinkMgr.getNumSinks();
    if (numSinks == 0) {
        LOG.warn("No replication sinks found, returning without replicating. The source should retry" + " with the same set of edits.");
        return false;
    }
    // minimum of: configured threads, number of 100-waledit batches,
    //  and number of current sinks
    int n = Math.min(Math.min(this.maxThreads, entries.size() / 100 + 1), numSinks);
    List<List<Entry>> entryLists = new ArrayList<>(n);
    if (n == 1) {
        entryLists.add(entries);
    } else {
        for (int i = 0; i < n; i++) {
            entryLists.add(new ArrayList<>(entries.size() / n + 1));
        }
        // now group by region
        for (Entry e : entries) {
            entryLists.get(Math.abs(Bytes.hashCode(e.getKey().getEncodedRegionName()) % n)).add(e);
        }
    }
    while (this.isRunning() && !exec.isShutdown()) {
        if (!isPeerEnabled()) {
            if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
                sleepMultiplier++;
            }
            continue;
        }
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Replicating " + entries.size() + " entries of total size " + replicateContext.getSize());
            }
            int futures = 0;
            for (int i = 0; i < entryLists.size(); i++) {
                if (!entryLists.get(i).isEmpty()) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Submitting " + entryLists.get(i).size() + " entries of total size " + replicateContext.getSize());
                    }
                    // RuntimeExceptions encountered here bubble up and are handled in ReplicationSource
                    pool.submit(createReplicator(entryLists.get(i), i));
                    futures++;
                }
            }
            IOException iox = null;
            for (int i = 0; i < futures; i++) {
                try {
                    // wait for all futures, remove successful parts
                    // (only the remaining parts will be retried)
                    Future<Integer> f = pool.take();
                    int index = f.get().intValue();
                    int batchSize = entryLists.get(index).size();
                    entryLists.set(index, Collections.<Entry>emptyList());
                    // Now, we have marked the batch as done replicating, record its size
                    numReplicated += batchSize;
                } catch (InterruptedException ie) {
                    iox = new IOException(ie);
                } catch (ExecutionException ee) {
                    // cause must be an IOException
                    iox = (IOException) ee.getCause();
                }
            }
            if (iox != null) {
                // if we had any exceptions, try again
                throw iox;
            }
            if (numReplicated != entries.size()) {
                // Something went wrong here and we don't know what, let's just fail and retry.
                LOG.warn("The number of edits replicated is different from the number received," + " failing for now.");
                return false;
            }
            // update metrics
            this.metrics.setAgeOfLastShippedOp(entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
            return true;
        } catch (IOException ioe) {
            // Didn't ship anything, but must still age the last time we did
            this.metrics.refreshAgeOfLastShippedOp(walGroupId);
            if (ioe instanceof RemoteException) {
                ioe = ((RemoteException) ioe).unwrapRemoteException();
                LOG.warn("Can't replicate because of an error on the remote cluster: ", ioe);
                if (ioe instanceof TableNotFoundException) {
                    if (sleepForRetries("A table is missing in the peer cluster. " + "Replication cannot proceed without losing data.", sleepMultiplier)) {
                        sleepMultiplier++;
                    }
                } else if (ioe instanceof SaslException) {
                    LOG.warn("Peer encountered SaslException, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                }
            } else {
                if (ioe instanceof SocketTimeoutException) {
                    // This exception means we waited for more than 60s and nothing
                    // happened, the cluster is alive and calling it right away
                    // even for a test just makes things worse.
                    sleepForRetries("Encountered a SocketTimeoutException. Since the " + "call to the remote cluster timed out, which is usually " + "caused by a machine failure or a massive slowdown", this.socketTimeoutMultiplier);
                } else if (ioe instanceof ConnectException) {
                    LOG.warn("Peer is unavailable, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                } else {
                    LOG.warn("Can't replicate because of a local or network error: ", ioe);
                }
            }
            if (sleepForRetries("Since we are unable to replicate", sleepMultiplier)) {
                sleepMultiplier++;
            }
        }
    }
    // in case we exited before replicating
    return false;
}
Also used : ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) SaslException(javax.security.sasl.SaslException) HBaseReplicationEndpoint(org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) Entry(org.apache.hadoop.hbase.wal.WAL.Entry) SocketTimeoutException(java.net.SocketTimeoutException) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) RemoteException(org.apache.hadoop.ipc.RemoteException) ConnectException(java.net.ConnectException)

Example 3 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class SnapshotManifestV1 method loadRegionManifests.

static List<SnapshotRegionManifest> loadRegionManifests(final Configuration conf, final Executor executor, final FileSystem fs, final Path snapshotDir, final SnapshotDescription desc) throws IOException {
    FileStatus[] regions = FSUtils.listStatus(fs, snapshotDir, new FSUtils.RegionDirFilter(fs));
    if (regions == null) {
        LOG.debug("No regions under directory:" + snapshotDir);
        return null;
    }
    final ExecutorCompletionService<SnapshotRegionManifest> completionService = new ExecutorCompletionService<>(executor);
    for (final FileStatus region : regions) {
        completionService.submit(new Callable<SnapshotRegionManifest>() {

            @Override
            public SnapshotRegionManifest call() throws IOException {
                HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, region.getPath());
                return buildManifestFromDisk(conf, fs, snapshotDir, hri);
            }
        });
    }
    ArrayList<SnapshotRegionManifest> regionsManifest = new ArrayList<>(regions.length);
    try {
        for (int i = 0; i < regions.length; ++i) {
            regionsManifest.add(completionService.take().get());
        }
    } catch (InterruptedException e) {
        throw new InterruptedIOException(e.getMessage());
    } catch (ExecutionException e) {
        IOException ex = new IOException();
        ex.initCause(e.getCause());
        throw ex;
    }
    return regionsManifest;
}
Also used : InterruptedIOException(java.io.InterruptedIOException) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ExecutionException(java.util.concurrent.ExecutionException) FSUtils(org.apache.hadoop.hbase.util.FSUtils)

Example 4 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HRegion method initializeStores.

/**
   * Open all Stores.
   * @param reporter
   * @param status
   * @return Highest sequenceId found out in a Store.
   * @throws IOException
   */
private long initializeStores(final CancelableProgressable reporter, MonitoredTask status) throws IOException {
    // Load in all the HStores.
    long maxSeqId = -1;
    // initialized to -1 so that we pick up MemstoreTS from column families
    long maxMemstoreTS = -1;
    if (!htableDescriptor.getFamilies().isEmpty()) {
        // initialize the thread pool for opening stores in parallel.
        ThreadPoolExecutor storeOpenerThreadPool = getStoreOpenAndCloseThreadPool("StoreOpener-" + this.getRegionInfo().getShortNameToLog());
        CompletionService<HStore> completionService = new ExecutorCompletionService<>(storeOpenerThreadPool);
        // initialize each store in parallel
        for (final HColumnDescriptor family : htableDescriptor.getFamilies()) {
            status.setStatus("Instantiating store for column family " + family);
            completionService.submit(new Callable<HStore>() {

                @Override
                public HStore call() throws IOException {
                    return instantiateHStore(family);
                }
            });
        }
        boolean allStoresOpened = false;
        boolean hasSloppyStores = false;
        try {
            for (int i = 0; i < htableDescriptor.getFamilies().size(); i++) {
                Future<HStore> future = completionService.take();
                HStore store = future.get();
                this.stores.put(store.getFamily().getName(), store);
                if (store.isSloppyMemstore()) {
                    hasSloppyStores = true;
                }
                long storeMaxSequenceId = store.getMaxSequenceId();
                maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), storeMaxSequenceId);
                if (maxSeqId == -1 || storeMaxSequenceId > maxSeqId) {
                    maxSeqId = storeMaxSequenceId;
                }
                long maxStoreMemstoreTS = store.getMaxMemstoreTS();
                if (maxStoreMemstoreTS > maxMemstoreTS) {
                    maxMemstoreTS = maxStoreMemstoreTS;
                }
            }
            allStoresOpened = true;
            if (hasSloppyStores) {
                htableDescriptor.setFlushPolicyClassName(FlushNonSloppyStoresFirstPolicy.class.getName());
                LOG.info("Setting FlushNonSloppyStoresFirstPolicy for the region=" + this);
            }
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e.getCause());
        } finally {
            storeOpenerThreadPool.shutdownNow();
            if (!allStoresOpened) {
                // something went wrong, close all opened stores
                LOG.error("Could not initialize all stores for the region=" + this);
                for (Store store : this.stores.values()) {
                    try {
                        store.close();
                    } catch (IOException e) {
                        LOG.warn(e.getMessage());
                    }
                }
            }
        }
    }
    return Math.max(maxSeqId, maxMemstoreTS + 1);
}
Also used : InterruptedIOException(java.io.InterruptedIOException) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ExecutionException(java.util.concurrent.ExecutionException)

Example 5 with ExecutorCompletionService

use of java.util.concurrent.ExecutorCompletionService in project hbase by apache.

the class HStore method close.

@Override
public ImmutableCollection<StoreFile> close() throws IOException {
    this.archiveLock.lock();
    this.lock.writeLock().lock();
    try {
        // Clear so metrics doesn't find them.
        ImmutableCollection<StoreFile> result = storeEngine.getStoreFileManager().clearFiles();
        Collection<StoreFile> compactedfiles = storeEngine.getStoreFileManager().clearCompactedFiles();
        // clear the compacted files
        if (compactedfiles != null && !compactedfiles.isEmpty()) {
            removeCompactedfiles(compactedfiles);
        }
        if (!result.isEmpty()) {
            // initialize the thread pool for closing store files in parallel.
            ThreadPoolExecutor storeFileCloserThreadPool = this.region.getStoreFileOpenAndCloseThreadPool("StoreFileCloserThread-" + this.getColumnFamilyName());
            // close each store file in parallel
            CompletionService<Void> completionService = new ExecutorCompletionService<>(storeFileCloserThreadPool);
            for (final StoreFile f : result) {
                completionService.submit(new Callable<Void>() {

                    @Override
                    public Void call() throws IOException {
                        boolean evictOnClose = cacheConf != null ? cacheConf.shouldEvictOnClose() : true;
                        f.closeReader(evictOnClose);
                        return null;
                    }
                });
            }
            IOException ioe = null;
            try {
                for (int i = 0; i < result.size(); i++) {
                    try {
                        Future<Void> future = completionService.take();
                        future.get();
                    } catch (InterruptedException e) {
                        if (ioe == null) {
                            ioe = new InterruptedIOException();
                            ioe.initCause(e);
                        }
                    } catch (ExecutionException e) {
                        if (ioe == null)
                            ioe = new IOException(e.getCause());
                    }
                }
            } finally {
                storeFileCloserThreadPool.shutdownNow();
            }
            if (ioe != null)
                throw ioe;
        }
        LOG.info("Closed " + this);
        return result;
    } finally {
        this.lock.writeLock().unlock();
        this.archiveLock.unlock();
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)58 ExecutionException (java.util.concurrent.ExecutionException)27 ExecutorService (java.util.concurrent.ExecutorService)27 ArrayList (java.util.ArrayList)26 IOException (java.io.IOException)23 Future (java.util.concurrent.Future)18 Test (org.junit.Test)12 InterruptedIOException (java.io.InterruptedIOException)9 List (java.util.List)8 Path (org.apache.hadoop.fs.Path)8 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 Callable (java.util.concurrent.Callable)5 HashMap (java.util.HashMap)4 TimeoutException (java.util.concurrent.TimeoutException)4 File (java.io.File)3 Random (java.util.Random)3 FileData (com.alibaba.otter.shared.etl.model.FileData)2 UnknownHostException (java.net.UnknownHostException)2 Path (java.nio.file.Path)2